Skip to content

Commit a7f3b5f

Browse files
committed
[SPARK-52460][SQL] Store internal TIME values as nanoseconds
### What changes were proposed in this pull request? In the PR, I propose to store internal TIME values as nanoseconds in `Long` since the midnight instead of microseconds in `Long`. ### Why are the changes needed? `Long` with nanoseconds precision can keep the full range of TIME values from 0 to 24 * 60 * 60 * 1000 * 1000 * 1000 - 1 which is less than maximum of Long. This will simplify support of `TIME(9)`, for example. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" $ build/sbt "test:testOnly *TimeExpressionsSuite" $ build/sbt "test:testOnly *TimeFormatterSuite" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51156 from MaxGekk/time-nanos. Authored-by: Max Gekk <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent 2e379be commit a7f3b5f

File tree

22 files changed

+165
-100
lines changed

22 files changed

+165
-100
lines changed

sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util
1919
import java.lang.invoke.{MethodHandles, MethodType}
2020
import java.sql.{Date, Timestamp}
2121
import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, ZonedDateTime, ZoneId, ZoneOffset}
22-
import java.time.temporal.ChronoField.MICRO_OF_DAY
22+
import java.time.temporal.ChronoField.{MICRO_OF_DAY, NANO_OF_DAY}
2323
import java.util.TimeZone
2424
import java.util.concurrent.TimeUnit.{MICROSECONDS, NANOSECONDS}
2525
import java.util.regex.Pattern
@@ -83,6 +83,12 @@ trait SparkDateTimeUtils {
8383
case ldt: LocalDateTime => localDateTimeToMicros(ldt)
8484
}
8585

86+
/**
87+
* Converts the time to microseconds since midnight. In Spark time values have nanoseconds
88+
* precision, so this conversion is lossy.
89+
*/
90+
def nanosToMicros(nanos: Long): Long = Math.floorDiv(nanos, MICROS_PER_MILLIS)
91+
8692
/**
8793
* Converts the timestamp to milliseconds since epoch. In Spark timestamp values have
8894
* microseconds precision, so this conversion is lossy.
@@ -101,6 +107,11 @@ trait SparkDateTimeUtils {
101107
Math.multiplyExact(millis, MICROS_PER_MILLIS)
102108
}
103109

110+
/**
111+
* Converts microseconds since the midnight to nanoseconds.
112+
*/
113+
def microsToNanos(micros: Long): Long = Math.multiplyExact(micros, NANOS_PER_MICROS)
114+
104115
// See issue SPARK-35679
105116
// min second cause overflow in instant to micro
106117
private val MIN_SECONDS = Math.floorDiv(Long.MinValue, MICROS_PER_SECOND)
@@ -225,17 +236,15 @@ trait SparkDateTimeUtils {
225236
}
226237

227238
/**
228-
* Converts the local time to the number of microseconds within the day, from 0 to (24 * 60 * 60
229-
* * 1000000) - 1.
239+
* Converts the local time to the number of nanoseconds within the day, from 0 to (24 * 60 * 60
240+
* * 1000 * 1000 * 1000) - 1.
230241
*/
231-
def localTimeToMicros(localTime: LocalTime): Long = localTime.getLong(MICRO_OF_DAY)
242+
def localTimeToNanos(localTime: LocalTime): Long = localTime.getLong(NANO_OF_DAY)
232243

233244
/**
234-
* Converts the number of microseconds within the day to the local time.
245+
* Converts the number of nanoseconds within the day to the local time.
235246
*/
236-
def microsToLocalTime(micros: Long): LocalTime = {
237-
LocalTime.ofNanoOfDay(Math.multiplyExact(micros, NANOS_PER_MICROS))
238-
}
247+
def nanosToLocalTime(nanos: Long): LocalTime = LocalTime.ofNanoOfDay(nanos)
239248

240249
/**
241250
* Converts a local date at the default JVM time zone to the number of days since 1970-01-01 in
@@ -716,7 +725,7 @@ trait SparkDateTimeUtils {
716725
}
717726
val nanoseconds = MICROSECONDS.toNanos(segments(6))
718727
val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
719-
Some(localTimeToMicros(localTime))
728+
Some(localTimeToNanos(localTime))
720729
} catch {
721730
case NonFatal(_) => None
722731
}

sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ import org.apache.spark.sql.catalyst.util.SparkDateTimeUtils._
2525
import org.apache.spark.unsafe.types.UTF8String
2626

2727
sealed trait TimeFormatter extends Serializable {
28-
def parse(s: String): Long // returns microseconds since midnight
28+
def parse(s: String): Long // returns nanoseconds since midnight
2929

3030
def format(localTime: LocalTime): String
31-
// Converts microseconds since the midnight to time string
32-
def format(micros: Long): String
31+
// Converts nanoseconds since the midnight to time string
32+
def format(nanos: Long): String
3333

3434
def validatePatternString(): Unit
3535
}
@@ -47,15 +47,15 @@ class Iso8601TimeFormatter(pattern: String, locale: Locale, isParsing: Boolean)
4747

4848
override def parse(s: String): Long = {
4949
val localTime = toLocalTime(formatter.parse(s))
50-
localTimeToMicros(localTime)
50+
localTimeToNanos(localTime)
5151
}
5252

5353
override def format(localTime: LocalTime): String = {
5454
localTime.format(formatter)
5555
}
5656

57-
override def format(micros: Long): String = {
58-
format(microsToLocalTime(micros))
57+
override def format(nanos: Long): String = {
58+
format(nanosToLocalTime(nanos))
5959
}
6060

6161
override def validatePatternString(): Unit = {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -375,14 +375,14 @@ object CatalystTypeConverters {
375375

376376
private object TimeConverter extends CatalystTypeConverter[LocalTime, LocalTime, Any] {
377377
override def toCatalystImpl(scalaValue: LocalTime): Long = {
378-
DateTimeUtils.localTimeToMicros(scalaValue)
378+
DateTimeUtils.localTimeToNanos(scalaValue)
379379
}
380380
override def toScala(catalystValue: Any): LocalTime = {
381381
if (catalystValue == null) null
382-
else DateTimeUtils.microsToLocalTime(catalystValue.asInstanceOf[Long])
382+
else DateTimeUtils.nanosToLocalTime(catalystValue.asInstanceOf[Long])
383383
}
384384
override def toScalaImpl(row: InternalRow, column: Int): LocalTime =
385-
DateTimeUtils.microsToLocalTime(row.getLong(column))
385+
DateTimeUtils.nanosToLocalTime(row.getLong(column))
386386
}
387387

388388
private object TimestampConverter extends CatalystTypeConverter[Any, Timestamp, Any] {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ object DeserializerBuildHelper {
160160
StaticInvoke(
161161
DateTimeUtils.getClass,
162162
ObjectType(classOf[java.time.LocalTime]),
163-
"microsToLocalTime",
163+
"nanosToLocalTime",
164164
path :: Nil,
165165
returnNullable = false)
166166
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ object SerializerBuildHelper {
103103
StaticInvoke(
104104
DateTimeUtils.getClass,
105105
TimeType(),
106-
"localTimeToMicros",
106+
"localTimeToNanos",
107107
inputObject :: Nil,
108108
returnNullable = false)
109109
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern
4949
import org.apache.spark.sql.catalyst.trees.TreePattern.{LITERAL, NULL_LITERAL, TRUE_OR_FALSE_LITERAL}
5050
import org.apache.spark.sql.catalyst.types._
5151
import org.apache.spark.sql.catalyst.util._
52-
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros, localTimeToMicros}
52+
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros, localTimeToNanos}
5353
import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
5454
import org.apache.spark.sql.catalyst.util.IntervalUtils.{durationToMicros, periodToMonths, toDayTimeIntervalString, toYearMonthIntervalString}
5555
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -89,7 +89,7 @@ object Literal {
8989
case l: LocalDateTime => Literal(DateTimeUtils.localDateTimeToMicros(l), TimestampNTZType)
9090
case ld: LocalDate => Literal(ld.toEpochDay.toInt, DateType)
9191
case d: Date => Literal(DateTimeUtils.fromJavaDate(d), DateType)
92-
case lt: LocalTime => Literal(localTimeToMicros(lt), TimeType())
92+
case lt: LocalTime => Literal(localTimeToNanos(lt), TimeType())
9393
case d: Duration => Literal(durationToMicros(d), DayTimeIntervalType())
9494
case p: Period => Literal(periodToMonths(p), YearMonthIntervalType())
9595
case a: Array[Byte] => Literal(a, BinaryType)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ object DateTimeUtils extends SparkDateTimeUtils {
109109
* Returns the hour value of a given TIME (TimeType) value.
110110
*/
111111
def getHoursOfTime(micros: Long): Int = {
112-
microsToLocalTime(micros).getHour
112+
nanosToLocalTime(micros).getHour
113113
}
114114

115115
/**
@@ -124,7 +124,7 @@ object DateTimeUtils extends SparkDateTimeUtils {
124124
* Returns the minute value of a given TIME (TimeType) value.
125125
*/
126126
def getMinutesOfTime(micros: Long): Int = {
127-
microsToLocalTime(micros).getMinute
127+
nanosToLocalTime(micros).getMinute
128128
}
129129

130130
/**
@@ -139,7 +139,7 @@ object DateTimeUtils extends SparkDateTimeUtils {
139139
* Returns the second value of a given TIME (TimeType) value.
140140
*/
141141
def getSecondsOfTime(micros: Long): Int = {
142-
microsToLocalTime(micros).getSecond
142+
nanosToLocalTime(micros).getSecond
143143
}
144144
/**
145145
* Returns the seconds part and its fractional part with microseconds.
@@ -151,16 +151,16 @@ object DateTimeUtils extends SparkDateTimeUtils {
151151

152152
/**
153153
* Returns the second value with fraction from a given TIME (TimeType) value.
154-
* @param micros
155-
* The number of microseconds since the epoch.
154+
* @param nanos
155+
* The number of nanoseconds since the epoch.
156156
* @param precision
157157
* The time fractional seconds precision, which indicates the number of decimal digits
158158
* maintained.
159159
*/
160-
def getSecondsOfTimeWithFraction(micros: Long, precision: Int): Decimal = {
161-
val seconds = (micros / MICROS_PER_SECOND) % SECONDS_PER_MINUTE
160+
def getSecondsOfTimeWithFraction(nanos: Long, precision: Int): Decimal = {
161+
val seconds = (nanos / NANOS_PER_SECOND) % SECONDS_PER_MINUTE
162162
val scaleFactor = math.pow(10, precision).toLong
163-
val scaledFraction = (micros % MICROS_PER_SECOND) * scaleFactor / MICROS_PER_SECOND
163+
val scaledFraction = (nanos % NANOS_PER_SECOND) * scaleFactor / NANOS_PER_SECOND
164164
val fraction = scaledFraction.toDouble / scaleFactor
165165
Decimal(seconds + fraction, 8, 6)
166166
}
@@ -816,7 +816,7 @@ object DateTimeUtils extends SparkDateTimeUtils {
816816

817817
val nanos = Math.floorMod(unscaledSecFrac, MICROS_PER_SECOND) * NANOS_PER_MICROS
818818
val lt = LocalTime.of(hours, minutes, fullSecs.toInt, nanos.toInt)
819-
localTimeToMicros(lt)
819+
localTimeToNanos(lt)
820820
} catch {
821821
case e @ (_: DateTimeException | _: ArithmeticException) =>
822822
throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRangeWithoutSuggestion(e)

sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ object RandomDataGenerator {
292292
randomNumeric[LocalTime](
293293
rand,
294294
(rand: Random) => {
295-
DateTimeUtils.microsToLocalTime(rand.between(0, 24 * 60 * 60 * 1000 * 1000L))
295+
DateTimeUtils.nanosToLocalTime(rand.between(0, 24 * 60 * 60 * 1000 * 1000L))
296296
},
297297
specialTimes.map(LocalTime.parse)
298298
)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
435435
"23:59:59.999999").foreach { time =>
436436
val input = LocalTime.parse(time)
437437
val result = CatalystTypeConverters.convertToCatalyst(input)
438-
val expected = DateTimeUtils.localTimeToMicros(input)
438+
val expected = DateTimeUtils.localTimeToNanos(input)
439439
assert(result === expected)
440440
}
441441
}
@@ -449,7 +449,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
449449
43200999999L,
450450
86399000000L,
451451
86399999999L).foreach { us =>
452-
val localTime = DateTimeUtils.microsToLocalTime(us)
452+
val localTime = DateTimeUtils.nanosToLocalTime(us)
453453
assert(CatalystTypeConverters.createToScalaConverter(TimeType())(us) === localTime)
454454
}
455455
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
377377
val encoder = ExpressionEncoder(schema).resolveAndBind()
378378
val localTime = java.time.LocalTime.parse("20:38:45.123456")
379379
val row = toRow(encoder, Row(localTime))
380-
assert(row.getLong(0) === DateTimeUtils.localTimeToMicros(localTime))
380+
assert(row.getLong(0) === DateTimeUtils.localTimeToNanos(localTime))
381381
val readback = fromRow(encoder, row)
382382
assert(readback.get(0).equals(localTime))
383383
}

0 commit comments

Comments
 (0)