Skip to content

Commit 74b6cc4

Browse files
fix: support non-string types for iceberg partition listing
Co-authored-by: Thomas Chow <[email protected]>
1 parent 3d2e77d commit 74b6cc4

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

spark/src/main/scala/ai/chronon/spark/format/Iceberg.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package ai.chronon.spark.format
22

33
import org.apache.spark.sql.SparkSession
4+
import org.apache.spark.sql.functions.{col, date_format}
45
import org.apache.spark.sql.types.StructType
56

67
case object Iceberg extends Format {
@@ -29,12 +30,12 @@ case object Iceberg extends Format {
2930
.load(s"$tableName.partitions")
3031

3132
val index = partitionsDf.schema.fieldIndex("partition")
32-
33+
val partitionFmt = sparkSession.conf.get("spark.chronon.partition.format", "yyyyMMdd")
3334
if (partitionsDf.schema(index).dataType.asInstanceOf[StructType].fieldNames.contains("hr")) {
3435
// Hour filter is currently buggy in iceberg. https://github.com/apache/iceberg/issues/4718
3536
// so we collect and then filter.
3637
partitionsDf
37-
.select("partition.ds", "partition.hr")
38+
.select(date_format(col("partition.ds"), partitionFmt), col("partition.hr"))
3839
.collect()
3940
.filter(_.get(1) == null)
4041
.map(_.getString(0))
@@ -43,7 +44,7 @@ case object Iceberg extends Format {
4344
} else {
4445

4546
partitionsDf
46-
.select("partition.ds")
47+
.select(date_format(col("partition.ds"), partitionFmt))
4748
.collect()
4849
.map(_.getString(0))
4950
.toSeq

0 commit comments

Comments
 (0)