Skip to content

Commit 9d32bfd

Browse files
authored
Bazel migration for spark module (#196)
## Summary Migrated spark module to bazel. Known issue with tests : JUnit and Scala tests using FunSuite are not getting identified in bazel scala_test_suite. Thanks to @nikhil-zlai for working on the script to modify our tests to use Scala FlatSpec api which seems to be working with bazel. Will test again after his changes and create a separate PR if additional changes are needed. Also, it's good to be consistent with the unit tests api going forward unless we really need others. ## Checklist - [ ] Added Unit Tests - [x] Covered by existing CI - [ ] Integration tested - [ ] Documentation update <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit ## Release Notes - **Dependencies** - Updated logging framework dependency to Log4j 2.x across various libraries. - Updated ScalaTest dependencies in API testing. - Migrated library dependencies from Maven to Scala artifacts. - Updated Jackson, JSON4s, and other library versions. - Added new dependencies for Guava, Netty, Delta Spark, and Kafka. - **Build Configuration** - Introduced new Scala library and test configurations. - Added JVM binary assembly configuration. - Refined Bazel build rule imports and loading. - **Dependency Management** - Removed several unused dependencies. - Simplified dependency references. - Updated runtime dependency paths. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent be6ae9c commit 9d32bfd

File tree

9 files changed

+241
-94
lines changed

9 files changed

+241
-94
lines changed

aggregator/BUILD.bazel

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ scala_library(
1212
maven_artifact("org.apache.datasketches:datasketches-java"),
1313
maven_artifact("org.apache.commons:commons-lang3"),
1414
maven_artifact("org.slf4j:slf4j-api"),
15-
maven_artifact("org.slf4j:slf4j-log4j12"),
15+
maven_artifact("org.apache.logging.log4j:log4j-slf4j-impl"),
1616
scala_artifact("org.scala-lang.modules:scala-collection-compat"),
1717
],
1818
)
@@ -28,7 +28,7 @@ scala_library(
2828
maven_artifact("junit:junit"),
2929
maven_artifact("com.novocode:junit-interface"),
3030
maven_artifact("org.slf4j:slf4j-api"),
31-
maven_artifact("org.slf4j:slf4j-log4j12"),
31+
maven_artifact("org.apache.logging.log4j:log4j-slf4j-impl"),
3232
maven_artifact("com.google.code.gson:gson"),
3333
maven_artifact("org.apache.datasketches:datasketches-memory"),
3434
maven_artifact("org.apache.datasketches:datasketches-java"),
@@ -50,7 +50,7 @@ scala_test_suite(
5050
maven_artifact("junit:junit"),
5151
maven_artifact("com.novocode:junit-interface"),
5252
maven_artifact("org.slf4j:slf4j-api"),
53-
maven_artifact("org.slf4j:slf4j-log4j12"),
53+
maven_artifact("org.apache.logging.log4j:log4j-slf4j-impl"),
5454
maven_artifact("com.google.code.gson:gson"),
5555
maven_artifact("org.apache.datasketches:datasketches-memory"),
5656
maven_artifact("org.apache.datasketches:datasketches-java"),

api/BUILD.bazel

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ scala_library(
1010
maven_artifact("com.fasterxml.jackson.core:jackson-core"),
1111
maven_artifact("com.fasterxml.jackson.core:jackson-databind"),
1212
maven_artifact("org.slf4j:slf4j-api"),
13-
maven_artifact("org.slf4j:slf4j-log4j12"),
13+
maven_artifact("org.apache.logging.log4j:log4j-slf4j-impl"),
1414
maven_artifact("org.apache.commons:commons-lang3"),
1515
maven_artifact("com.google.code.gson:gson"),
1616
scala_artifact("org.scala-lang.modules:scala-collection-compat"),
@@ -29,12 +29,15 @@ scala_test_suite(
2929
maven_artifact("com.fasterxml.jackson.core:jackson-core"),
3030
maven_artifact("com.fasterxml.jackson.core:jackson-databind"),
3131
maven_artifact("org.slf4j:slf4j-api"),
32-
maven_artifact("org.slf4j:slf4j-log4j12"),
32+
maven_artifact("org.apache.logging.log4j:log4j-slf4j-impl"),
3333
maven_artifact("junit:junit"),
3434
maven_artifact("com.novocode:junit-interface"),
3535
maven_artifact("org.mockito:mockito-core"),
3636
scala_artifact("org.scala-lang.modules:scala-parser-combinators"),
3737
scala_artifact("org.scala-lang.modules:scala-collection-compat"),
38+
scala_artifact("org.scalatest:scalatest"),
39+
scala_artifact("org.scalatest:scalatest-flatspec"),
40+
scala_artifact("org.scalatest:scalatest-funsuite"),
3841
],
3942
)
4043

@@ -50,7 +53,7 @@ java_library(
5053
deps = [
5154
maven_artifact("javax.annotation:javax.annotation.api"),
5255
maven_artifact("org.slf4j:slf4j-api"),
53-
maven_artifact("org.slf4j:slf4j-log4j12"),
56+
maven_artifact("org.apache.logging.log4j:log4j-slf4j-impl"),
5457
maven_artifact("org.apache.commons:commons-lang3"),
5558
maven_artifact("com.google.code.gson:gson"),
5659
],

online/BUILD.bazel

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@ scala_library(
1313
scala_artifact("org.json4s:json4s-ast"),
1414
scala_artifact("org.scala-lang.modules:scala-collection-compat"),
1515
maven_artifact("com.datadoghq:java-dogstatsd-client"),
16-
maven_artifact("org.rogach:scallop_2_12"),
16+
scala_artifact("org.rogach:scallop"),
1717
maven_artifact("net.jodah:typetools"),
1818
maven_artifact("com.github.ben-manes.caffeine:caffeine"),
1919
maven_artifact("com.fasterxml.jackson.core:jackson-core"),
2020
maven_artifact("com.fasterxml.jackson.core:jackson-databind"),
21-
maven_artifact("com.softwaremill.sttp.client3:core_2.12"),
22-
maven_artifact("com.softwaremill.sttp.model:core_2.12"),
23-
maven_artifact("com.softwaremill.sttp.shared:core_2.12"),
21+
scala_artifact("com.softwaremill.sttp.client3:core"),
22+
scala_artifact("com.softwaremill.sttp.model:core"),
23+
scala_artifact("com.softwaremill.sttp.shared:core"),
2424
maven_artifact("org.slf4j:slf4j-api"),
25-
maven_artifact("org.slf4j:slf4j-log4j12"),
25+
maven_artifact("org.apache.logging.log4j:log4j-slf4j-impl"),
2626
maven_artifact("com.google.code.gson:gson"),
2727
],
2828
)
@@ -40,7 +40,7 @@ scala_library(
4040
# Libraries
4141
maven_artifact("com.github.ben-manes.caffeine:caffeine"),
4242
maven_artifact("org.slf4j:slf4j-api"),
43-
maven_artifact("org.slf4j:slf4j-log4j12"),
43+
maven_artifact("org.apache.logging.log4j:log4j-slf4j-impl"),
4444
maven_artifact("com.google.code.gson:gson"),
4545
# Testing
4646
scala_artifact("org.scalatest:scalatest-matchers-core"),
@@ -74,7 +74,7 @@ scala_test_suite(
7474
maven_artifact("org.mockito:mockito-core"),
7575
scala_artifact("org.scala-lang.modules:scala-collection-compat"),
7676
maven_artifact("org.slf4j:slf4j-api"),
77-
maven_artifact("org.slf4j:slf4j-log4j12"),
77+
maven_artifact("org.apache.logging.log4j:log4j-slf4j-impl"),
7878
maven_artifact("com.google.code.gson:gson"),
7979
maven_artifact("com.github.ben-manes.caffeine:caffeine"),
8080
maven_artifact("junit:junit"),

spark/BUILD.bazel

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
scala_library(
2+
name = "lib",
3+
srcs = glob(["src/main/**/*.scala"]),
4+
resources = [
5+
"src/main/resources/log4j2.properties",
6+
],
7+
deps = [
8+
"//aggregator:lib",
9+
"//api:lib",
10+
"//api:thrift",
11+
"//online:lib",
12+
"//tools/build_rules/spark:spark-exec",
13+
maven_artifact("com.fasterxml.jackson.core:jackson-core"),
14+
maven_artifact("com.fasterxml.jackson.core:jackson-databind"),
15+
scala_artifact("com.fasterxml.jackson.module:jackson-module-scala"),
16+
maven_artifact("com.google.guava:guava"),
17+
maven_artifact("commons-io:commons-io"),
18+
maven_artifact("commons-lang:commons-lang"),
19+
maven_artifact("org.apache.kafka:kafka-clients"),
20+
scala_artifact("org.json4s:json4s-core"),
21+
scala_artifact("org.json4s:json4s-jackson"),
22+
scala_artifact("org.json4s:json4s-ast"),
23+
scala_artifact("org.scala-lang.modules:scala-collection-compat"),
24+
scala_artifact("org.scala-lang.modules:scala-parser-combinators"),
25+
maven_artifact("org.slf4j:slf4j-api"),
26+
maven_artifact("org.apache.logging.log4j:log4j-slf4j-impl"),
27+
maven_artifact("com.google.code.gson:gson"),
28+
maven_artifact("jakarta.servlet:jakarta.servlet-api"),
29+
maven_artifact("org.yaml:snakeyaml"),
30+
maven_artifact("org.apache.datasketches:datasketches-memory"),
31+
maven_artifact("org.apache.datasketches:datasketches-java"),
32+
scala_artifact("org.rogach:scallop"),
33+
maven_artifact("io.netty:netty-all"),
34+
maven_artifact("io.netty:netty-transport"),
35+
maven_artifact("io.netty:netty-handler"),
36+
maven_artifact("io.netty:netty-buffer"),
37+
maven_artifact("io.netty:netty-codec-http"),
38+
maven_artifact("io.netty:netty-common"),
39+
maven_artifact("io.netty:netty-codec"),
40+
scala_artifact("io.delta:delta-spark"),
41+
],
42+
)
43+
44+
scala_library(
45+
name = "test-lib",
46+
srcs = glob(["src/test/**/*.scala"]),
47+
visibility = ["//visibility:public"],
48+
deps = [
49+
":lib",
50+
"//aggregator:lib",
51+
"//aggregator:test-lib",
52+
"//api:lib",
53+
"//api:thrift",
54+
"//online:lib",
55+
"//tools/build_rules/spark:spark-exec",
56+
# Library
57+
maven_artifact("com.google.code.gson:gson"),
58+
maven_artifact("com.google.guava:guava"),
59+
scala_artifact("org.rogach:scallop"),
60+
maven_artifact("commons.io:commons-io"),
61+
maven_artifact("commons.lang:commons-lang"),
62+
scala_artifact("org.scala-lang.modules:scala-java8-compat"),
63+
scala_artifact("com.fasterxml.jackson.module:jackson-module-scala"),
64+
maven_artifact("org.slf4j:slf4j-api"),
65+
maven_artifact("org.apache.logging.log4j:log4j-slf4j-impl"),
66+
scala_artifact("org.json4s:json4s-core"),
67+
scala_artifact("org.json4s:json4s-jackson"),
68+
scala_artifact("org.json4s:json4s-ast"),
69+
maven_artifact("org.yaml:snakeyaml"),
70+
# Testing
71+
scala_artifact("org.scalatest:scalatest-matchers-core"),
72+
scala_artifact("org.scalatest:scalatest-core"),
73+
scala_artifact("org.scalatest:scalatest"),
74+
scala_artifact("org.scalatest:scalatest-flatspec"),
75+
scala_artifact("org.scalatest:scalatest-funsuite"),
76+
scala_artifact("org.scalatest:scalatest-shouldmatchers"),
77+
scala_artifact("org.scalactic:scalactic"),
78+
scala_artifact("org.scalatestplus:mockito-3-4"),
79+
maven_artifact("org.scalatest:scalatest-compatible"),
80+
maven_artifact("junit:junit"),
81+
maven_artifact("com.novocode:junit-interface"),
82+
maven_artifact("org.mockito:mockito-core"),
83+
],
84+
)
85+
86+
scala_test_suite(
87+
name = "test",
88+
srcs = glob(["src/test/**/*.scala"]),
89+
visibility = ["//visibility:public"],
90+
jvm_flags = [
91+
"--add-opens=java.base/java.lang=ALL-UNNAMED",
92+
"--add-opens=java.base/java.lang.invoke=ALL-UNNAMED",
93+
"--add-opens=java.base/java.lang.reflect=ALL-UNNAMED",
94+
"--add-opens=java.base/java.io=ALL-UNNAMED",
95+
"--add-opens=java.base/java.net=ALL-UNNAMED",
96+
"--add-opens=java.base/java.nio=ALL-UNNAMED",
97+
"--add-opens=java.base/java.util=ALL-UNNAMED",
98+
"--add-opens=java.base/java.util.concurrent=ALL-UNNAMED",
99+
"--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED",
100+
"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED",
101+
"--add-opens=java.base/sun.nio.cs=ALL-UNNAMED",
102+
"--add-opens=java.base/sun.security.action=ALL-UNNAMED",
103+
"--add-opens=java.base/sun.util.calendar=ALL-UNNAMED",
104+
],
105+
deps = [
106+
":lib",
107+
":test-lib",
108+
"//aggregator:lib",
109+
"//aggregator:test-lib",
110+
"//api:lib",
111+
"//api:thrift",
112+
"//online:lib",
113+
"//tools/build_rules/spark:spark-exec",
114+
# Library
115+
maven_artifact("com.google.code.gson:gson"),
116+
maven_artifact("com.google.guava:guava"),
117+
scala_artifact("org.rogach:scallop"),
118+
maven_artifact("commons.io:commons-io"),
119+
maven_artifact("commons.lang:commons-lang"),
120+
scala_artifact("org.scala-lang.modules:scala-java8-compat"),
121+
scala_artifact("com.fasterxml.jackson.module:jackson-module-scala"),
122+
maven_artifact("org.slf4j:slf4j-api"),
123+
maven_artifact("org.apache.logging.log4j:log4j-slf4j-impl"),
124+
scala_artifact("org.json4s:json4s-core"),
125+
scala_artifact("org.json4s:json4s-jackson"),
126+
scala_artifact("org.json4s:json4s-ast"),
127+
maven_artifact("org.yaml:snakeyaml"),
128+
# Testing
129+
scala_artifact("org.scalatest:scalatest-matchers-core"),
130+
scala_artifact("org.scalatest:scalatest-core"),
131+
scala_artifact("org.scalatest:scalatest"),
132+
scala_artifact("org.scalatest:scalatest-flatspec"),
133+
scala_artifact("org.scalatest:scalatest-funsuite"),
134+
scala_artifact("org.scalatest:scalatest-shouldmatchers"),
135+
scala_artifact("org.scalactic:scalactic"),
136+
scala_artifact("org.scalatestplus:mockito-3-4"),
137+
maven_artifact("org.scalatest:scalatest-compatible"),
138+
maven_artifact("junit:junit"),
139+
maven_artifact("com.novocode:junit-interface"),
140+
maven_artifact("org.mockito:mockito-core"),
141+
],
142+
)
143+
144+
jvm_binary(
145+
name = "assembly",
146+
deploy_env = ["//tools/build_rules/spark:spark"],
147+
main_class = "ai.chronon.spark.Driver",
148+
runtime_deps = [":lib"],
149+
)

tools/build_rules/dependencies/maven_repository.bzl

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -23,45 +23,33 @@ maven_repository = repository(
2323
"org.scalactic:scalactic_2.12:3.2.15",
2424

2525
# Add other dependencies
26-
"org.slf4j:slf4j-api:1.7.30",
27-
"org.slf4j:slf4j-log4j12:1.7.30",
26+
"org.slf4j:slf4j-api:2.0.12",
27+
"org.apache.logging.log4j:log4j-slf4j-impl:2.20.0",
2828
"org.apache.datasketches:datasketches-memory:3.0.2",
2929
"org.apache.datasketches:datasketches-java:6.1.1",
30-
"com.fasterxml.jackson.core:jackson-core:2.12.5",
31-
"com.fasterxml.jackson.core:jackson-databind:2.12.5",
30+
"com.fasterxml.jackson.core:jackson-core:2.15.2",
31+
"com.fasterxml.jackson.core:jackson-databind:2.15.2",
32+
"com.fasterxml.jackson.module:jackson-module-scala_2.12:2.15.2",
3233
"com.google.code.gson:gson:2.10.1",
3334
"javax.annotation:javax.annotation-api:1.3.2",
3435
"com.datadoghq:java-dogstatsd-client:4.4.1",
3536
"org.rogach:scallop_2.12:5.1.0",
3637
"net.jodah:typetools:0.6.3",
3738
"com.github.ben-manes.caffeine:caffeine:3.1.8",
3839
"com.softwaremill.sttp.client3:core_2.12:3.9.7",
39-
"org.json4s:json4s-jackson_2.12:3.6.12",
40-
"org.json4s:json4s-core_2.12:3.6.12",
41-
"org.json4s:json4s-ast_2.12:3.6.12",
40+
"org.json4s:json4s-jackson_2.12:3.7.0-M11",
41+
"org.json4s:json4s-core_2.12:3.7.0-M11",
42+
"org.json4s:json4s-ast_2.12:3.7.0-M11",
43+
"jakarta.servlet:jakarta.servlet-api:4.0.3",
44+
"com.google.guava:guava:33.3.1-jre",
45+
"org.yaml:snakeyaml:2.3",
46+
"commons-io:commons-io:2.9.0",
47+
"commons-lang:commons-lang:2.6",
48+
"io.netty:netty-all:4.1.99.Final",
49+
"io.delta:delta-spark_2.12:3.2.0",
4250

43-
# Flink
44-
versioned_artifacts("1.17.0", [
45-
"org.apache.flink:flink-clients",
46-
"org.apache.flink:flink-connector-files",
47-
"org.apache.flink:flink-connector-hive_2.12",
48-
"org.apache.flink:flink-csv",
49-
"org.apache.flink:flink-json",
50-
"org.apache.flink:flink-metrics-core",
51-
"org.apache.flink:flink-metrics-prometheus:jar",
52-
"org.apache.flink:flink-orc",
53-
"org.apache.flink:flink-parquet",
54-
"org.apache.flink:flink-protobuf",
55-
"org.apache.flink:flink-scala_2.12",
56-
"org.apache.flink:flink-sql-gateway-api",
57-
"org.apache.flink:flink-streaming-java",
58-
"org.apache.flink:flink-streaming-scala_2.12",
59-
"org.apache.flink:flink-table-api-java",
60-
"org.apache.flink:flink-table-planner_2.12",
61-
"org.apache.flink:flink-test-utils",
62-
"org.apache.flink:flink-streaming-java:jar:tests",
63-
"org.apache.flink:flink-metrics-dropwizard",
64-
]),
51+
# Kafka
52+
"org.apache.kafka:kafka-clients:3.9.0",
6553
],
6654
overrides = {
6755
},

tools/build_rules/dependencies/spark_repository.bzl

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,9 @@ spark_repository = repository(
1010
"org.apache.spark:spark-hive_2.12:3.5.1",
1111
"org.apache.spark:spark-streaming_2.12:3.5.1",
1212

13-
# Other dependencies
14-
"org.apache.curator:apache-curator:2.12.0",
15-
"com.esotericsoftware:kryo:5.1.1",
16-
"com.yahoo.datasketches:sketches-core:0.13.4",
17-
"com.yahoo.datasketches:memory:0.12.2",
18-
"com.yahoo.datasketches:sketches-hive:0.13.0",
19-
"org.apache.datasketches:datasketches-java:2.0.0",
20-
"org.apache.datasketches:datasketches-memory:1.3.0",
21-
22-
# Kafka dependencies - only Scala 2.12
23-
"org.apache.kafka:kafka_2.12:2.6.3",
24-
25-
# Avro dependencies
26-
"org.apache.avro:avro:1.8.2",
27-
"org.apache.avro:avro-mapred:1.8.2",
13+
# Hive dependencies
2814
"org.apache.hive:hive-metastore:2.3.9",
29-
"org.apache.hive:hive-exec:3.1.2",
30-
31-
# Monitoring
32-
"io.prometheus.jmx:jmx_prometheus_javaagent:0.20.0",
15+
"org.apache.hive:hive-exec:2.3.9",
3316
],
3417
excluded_artifacts = [
3518
"org.pentaho:pentaho-aggdesigner-algorithm",

tools/build_rules/jvm_binary.bzl

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
load("@rules_java//java:defs.bzl", "java_binary", "java_library")
2+
load("@io_bazel_rules_scala//scala:scala.bzl", "scala_binary", "scala_library")
3+
4+
def jvm_binary(
5+
name,
6+
srcs = [],
7+
deps = [],
8+
runtime_deps = [],
9+
services = {},
10+
tags = None,
11+
main_class = None,
12+
visibility = None,
13+
create_executable = True,
14+
testonly = None,
15+
# All other flags are passed to java_binary
16+
**kwargs):
17+
has_scala_srcs = False
18+
has_java_srcs = False
19+
for src in srcs:
20+
if src.endswith(".scala"):
21+
has_scala_srcs = True
22+
if src.endswith(".java"):
23+
has_java_srcs = True
24+
if has_scala_srcs and has_java_srcs:
25+
fail("Cannot have scala and java sources in same jvm_binary")
26+
27+
lib_name = name + "_lib"
28+
if has_scala_srcs:
29+
scala_library(
30+
name = lib_name,
31+
srcs = srcs,
32+
deps = deps,
33+
runtime_deps = runtime_deps,
34+
tags = tags,
35+
)
36+
else:
37+
java_library(
38+
name = lib_name,
39+
srcs = srcs,
40+
deps = deps,
41+
runtime_deps = runtime_deps,
42+
tags = tags,
43+
testonly = testonly,
44+
)
45+
46+
java_binary(
47+
name = name,
48+
runtime_deps = [lib_name],
49+
tags = tags,
50+
main_class = main_class,
51+
create_executable = create_executable,
52+
testonly = testonly,
53+
**kwargs
54+
)

0 commit comments

Comments
 (0)