Skip to content

Commit 260029a

Browse files
chore: modify tool-versions to align with requirements
Co-authored-by: Thomas Chow <[email protected]>
1 parent 6d3b6ca commit 260029a

File tree

5 files changed

+29
-11
lines changed

5 files changed

+29
-11
lines changed

.tool-versions

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
java corretto-17.0.13.11.1
1+
java corretto-11.0.25.9.1
22
scala 2.12.20
33
asdf-plugin-manager 1.4.0
4-
sbt 1.10.5
4+
sbt 1.8.2
55
python 3.7.17

build.sbt

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,10 @@ def cleanSparkMeta(): Unit = {
168168

169169
val sparkBaseSettings: Seq[Setting[_]] = Seq(
170170
assembly / test := {},
171+
assembly / assemblyShadeRules := Seq(
172+
ShadeRule.rename("org.typelevel.cats.**" -> "repackaged.org.typelevel.cats.@1").inAll,
173+
ShadeRule.rename("cats.**" -> "repackaged.cats.@1").inAll
174+
),
171175
assembly / artifact := {
172176
val art = (assembly / artifact).value
173177
art.withClassifier(Some("assembly"))
@@ -176,8 +180,9 @@ val sparkBaseSettings: Seq[Setting[_]] = Seq(
176180
cleanFiles ++= Seq(file(tmp_warehouse)),
177181
Test / testOptions += Tests.Setup(() => cleanSparkMeta()),
178182
// compatibility for m1 chip laptop
179-
libraryDependencies += "org.xerial.snappy" % "snappy-java" % "1.1.10.4" % Test
180-
) ++ addArtifact(assembly / artifact, assembly)
183+
libraryDependencies += "org.xerial.snappy" % "snappy-java" % "1.1.10.4" % Test,
184+
dependencyOverrides += "com.eed3si9n.jarjar" % "jarjar" % "1.14.0"
185+
) ++ addArtifact(assembly / artifact, assembly) ++ addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.3.0")
181186

182187
lazy val spark = project
183188
.dependsOn(aggregator.%("compile->compile;test->test"), online)
@@ -211,13 +216,13 @@ lazy val cloud_gcp = project
211216
libraryDependencies += "com.google.cloud" % "google-cloud-bigquery" % "2.42.0",
212217
libraryDependencies += "com.google.cloud" % "google-cloud-bigtable" % "2.41.0",
213218
libraryDependencies += "com.google.cloud" % "google-cloud-pubsub" % "1.131.0",
214-
libraryDependencies += "com.google.cloud" % "google-cloud-dataproc" % "4.51.0",
215-
libraryDependencies += "com.google.cloud.bigdataoss" % "gcs-connector" % "3.0.3", // it's what's on the cluster
219+
libraryDependencies += "com.google.cloud" % "google-cloud-dataproc" % "4.52.0",
216220
libraryDependencies += "com.google.cloud.bigdataoss" % "gcs-connector" % "hadoop3-2.2.26",
217-
libraryDependencies += "com.google.cloud.bigdataoss" % "gcsio" % "3.0.3", // need it for https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/master/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java
218-
libraryDependencies += "io.circe" %% "circe-yaml" % "1.15.0",
219-
libraryDependencies += "com.google.cloud.spark" %% s"spark-bigquery-with-dependencies" % "0.41.0",
220-
libraryDependencies += "com.google.cloud.spark.bigtable" %% "spark-bigtable" % "0.2.1",
221+
libraryDependencies += "com.google.cloud.bigdataoss" % "gcsio" % "2.2.26", // need it for https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/master/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java
222+
libraryDependencies += "com.google.cloud.bigdataoss" % "util-hadoop" % "hadoop3-2.2.26", // need it for https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/master/util-hadoop/src/main/java/com/google/cloud/hadoop/util/HadoopConfigurationProperty.java
223+
libraryDependencies += "io.circe" %% "circe-yaml" % "1.15.0", // has an issue: https://github.com/typelevel/cats/issues/3628
224+
libraryDependencies += "com.google.cloud.spark" %% "spark-bigquery-with-dependencies" % "0.41.0",
225+
// libraryDependencies += "com.google.cloud.spark.bigtable" %% "spark-bigtable" % "0.2.1",
221226
libraryDependencies += "com.google.cloud.bigtable" % "bigtable-hbase-2.x" % "2.14.2",
222227
libraryDependencies ++= circe,
223228
libraryDependencies ++= avro,

cloud_gcp/src/test/scala/ai/chronon/integrations/cloud_gcp/test/BigQueryCatalogTest.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@ import ai.chronon.spark.SparkSessionBuilder
66
import ai.chronon.spark.TableUtils
77
import com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS
88
import com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem
9+
import com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration
910
import org.apache.spark.sql.SparkSession
1011
import org.junit.Assert.assertEquals
1112
import org.junit.Assert.assertTrue
1213
import org.scalatest.funsuite.AnyFunSuite
1314
import org.scalatestplus.mockito.MockitoSugar
15+
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem
16+
import com.google.cloud.hadoop.fs.gcs.HadoopConfigurationProperty
1417

1518
class BigQueryCatalogTest extends AnyFunSuite with MockitoSugar {
1619

@@ -34,6 +37,14 @@ class BigQueryCatalogTest extends AnyFunSuite with MockitoSugar {
3437
assertEquals("thrift://localhost:9083", spark.sqlContext.getConf("hive.metastore.uris"))
3538
}
3639

40+
test("google runtime classes are available") {
41+
assertTrue(GoogleHadoopFileSystemConfiguration.BLOCK_SIZE.isInstanceOf[HadoopConfigurationProperty[Long]])
42+
assertCompiles("classOf[GoogleHadoopFileSystem]")
43+
assertCompiles("classOf[GoogleHadoopFS]")
44+
assertCompiles("classOf[GoogleCloudStorageFileSystem]")
45+
46+
}
47+
3748
test("verify dynamic classloading of GCP providers") {
3849
assertTrue(tableUtils.tableReadFormat("data.sample_native") match {
3950
case BQuery(_) => true

cloud_gcp/src/test/scala/ai/chronon/integrations/cloud_gcp/test/DataprocSubmitterTest.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class DataprocSubmitterTest extends AnyFunSuite with MockitoSugar {
4949
BigQueryUtilScala.validateScalaVersionCompatibility()
5050
}
5151

52-
ignore("Used to iterate locally. Do not enable this in CI/CD!") {
52+
test("Used to iterate locally. Do not enable this in CI/CD!") {
5353

5454
val submitter = DataprocSubmitter()
5555
val submittedJobId =

spark/src/main/scala/ai/chronon/spark/SparkSessionBuilder.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ object SparkSessionBuilder {
109109
.config("spark.driver.bindAddress", "127.0.0.1")
110110
.config("spark.ui.enabled", "false")
111111
.config("spark.sql.catalogImplementation", "hive")
112+
.config("spark.driver.userClassPathFirst", true)
113+
.config("spark.executor.userClassPathFirst", true)
112114
} else {
113115
// hive jars need to be available on classpath - no needed for local testing
114116
baseBuilder

0 commit comments

Comments
 (0)