Skip to content

Commit f1660b8

Browse files
chore: split out expensive spark tests to parallelize (#382)
## Summary ## Checklist - [ ] Added Unit Tests - [ ] Covered by existing CI - [ ] Integration tested - [ ] Documentation update <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Introduced dedicated testing workflows covering multiple system components to enhance overall reliability. - Added new test suites for various components to enhance testing granularity. - **Refactor** - Streamlined code organization with improved package structures and consolidated imports across test modules. - **Chores** - Upgraded automated testing configurations with optimized resource settings for improved performance and stability. <!-- end of auto-generated comment: release notes by coderabbit.ai --> <!-- av pr metadata This information is embedded by the av CLI when creating PRs to track the status of stacks when using Aviator. Please do not delete or edit this section of the PR. ``` {"parent":"main","parentHead":"","trunk":"main"} ``` --> --------- Co-authored-by: Thomas Chow <[email protected]>
1 parent 0c7d4ac commit f1660b8

18 files changed

+377
-265
lines changed

.github/workflows/test_scala_spark.yaml

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,133 @@ jobs:
4444
--google_credentials=bazel-cache-key.json \
4545
--test_env=JAVA_OPTS="-Xmx8G -Xms2G" \
4646
//spark:tests
47+
48+
fetcher_tests:
49+
runs-on: ubuntu-8_cores-32_gb
50+
container:
51+
image: ghcr.io/${{ github.repository }}-ci:latest
52+
credentials:
53+
username: ${{ github.actor }}
54+
password: ${{ secrets.GITHUB_TOKEN }}
55+
defaults:
56+
run:
57+
working-directory: ${{ github.workspace }}
58+
59+
steps:
60+
- uses: actions/checkout@v4
61+
62+
- name: Setup Bazel cache credentials
63+
run: |
64+
echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
65+
66+
- name: Run Fetcher tests
67+
run: |
68+
bazel test \
69+
--remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
70+
--google_credentials=bazel-cache-key.json \
71+
--test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
72+
//spark:fetcher_test
73+
74+
join_tests:
75+
runs-on: ubuntu-8_cores-32_gb
76+
container:
77+
image: ghcr.io/${{ github.repository }}-ci:latest
78+
credentials:
79+
username: ${{ github.actor }}
80+
password: ${{ secrets.GITHUB_TOKEN }}
81+
defaults:
82+
run:
83+
working-directory: ${{ github.workspace }}
84+
85+
steps:
86+
- uses: actions/checkout@v4
87+
88+
- name: Setup Bazel cache credentials
89+
run: |
90+
echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
91+
92+
- name: Run Join tests
93+
run: |
94+
bazel test \
95+
--remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
96+
--google_credentials=bazel-cache-key.json \
97+
--test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
98+
//spark:join_test
99+
100+
groupby_tests:
101+
runs-on: ubuntu-8_cores-32_gb
102+
container:
103+
image: ghcr.io/${{ github.repository }}-ci:latest
104+
credentials:
105+
username: ${{ github.actor }}
106+
password: ${{ secrets.GITHUB_TOKEN }}
107+
defaults:
108+
run:
109+
working-directory: ${{ github.workspace }}
110+
111+
steps:
112+
- uses: actions/checkout@v4
113+
114+
- name: Setup Bazel cache credentials
115+
run: |
116+
echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
117+
118+
- name: Run GroupBy tests
119+
run: |
120+
bazel test \
121+
--remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
122+
--google_credentials=bazel-cache-key.json \
123+
--test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
124+
//spark:groupby_test
125+
126+
analyzer_tests:
127+
runs-on: ubuntu-8_cores-32_gb
128+
container:
129+
image: ghcr.io/${{ github.repository }}-ci:latest
130+
credentials:
131+
username: ${{ github.actor }}
132+
password: ${{ secrets.GITHUB_TOKEN }}
133+
defaults:
134+
run:
135+
working-directory: ${{ github.workspace }}
136+
137+
steps:
138+
- uses: actions/checkout@v4
139+
140+
- name: Setup Bazel cache credentials
141+
run: |
142+
echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
143+
144+
- name: Run Analyzer tests
145+
run: |
146+
bazel test \
147+
--remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
148+
--google_credentials=bazel-cache-key.json \
149+
--test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
150+
//spark:analyzer_test
151+
152+
streaming_tests:
153+
runs-on: ubuntu-8_cores-32_gb
154+
container:
155+
image: ghcr.io/${{ github.repository }}-ci:latest
156+
credentials:
157+
username: ${{ github.actor }}
158+
password: ${{ secrets.GITHUB_TOKEN }}
159+
defaults:
160+
run:
161+
working-directory: ${{ github.workspace }}
162+
163+
steps:
164+
- uses: actions/checkout@v4
165+
166+
- name: Setup Bazel cache credentials
167+
run: |
168+
echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
169+
170+
- name: Run Streaming tests
171+
run: |
172+
bazel test \
173+
--remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
174+
--google_credentials=bazel-cache-key.json \
175+
--test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
176+
//spark:streaming_test

spark/BUILD.bazel

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,67 @@ scala_library(
9090
name = "test_lib",
9191
srcs = glob(["src/test/**/*.scala"]),
9292
format = True,
93-
visibility = ["//visibility:public"],
9493
deps = test_deps,
9594
)
9695

9796
scala_test_suite(
9897
name = "tests",
99-
srcs = glob(["src/test/**/*.scala"]),
98+
tags = ["large"],
99+
srcs = glob(["src/test/scala/ai/chronon/spark/test/*.scala",
100+
"src/test/scala/ai/chronon/spark/test/udafs/*.scala",
101+
"src/test/scala/ai/chronon/spark/test/stats/drift/*.scala",
102+
"src/test/scala/ai/chronon/spark/test/bootstrap/*.scala"]),
103+
data = glob(["spark/src/test/resources/**/*"]),
104+
# defined in prelude_bazel file
105+
jvm_flags = _JVM_FLAGS_FOR_ACCESSING_BASE_JAVA_CLASSES,
106+
visibility = ["//visibility:public"],
107+
deps = test_deps + [":test_lib"],
108+
)
109+
110+
scala_test_suite(
111+
name = "fetcher_test",
112+
srcs = glob(["src/test/scala/ai/chronon/spark/test/fetcher/*.scala"]),
113+
resources = ["//spark/src/test/resources:test-resources"],
114+
# defined in prelude_bazel file
115+
jvm_flags = _JVM_FLAGS_FOR_ACCESSING_BASE_JAVA_CLASSES,
116+
visibility = ["//visibility:public"],
117+
deps = test_deps + [":test_lib"],
118+
)
119+
120+
scala_test_suite(
121+
name = "groupby_test",
122+
srcs = glob(["src/test/scala/ai/chronon/spark/test/groupby/*.scala"]),
123+
data = glob(["spark/src/test/resources/**/*"]),
124+
# defined in prelude_bazel file
125+
jvm_flags = _JVM_FLAGS_FOR_ACCESSING_BASE_JAVA_CLASSES,
126+
visibility = ["//visibility:public"],
127+
deps = test_deps + [":test_lib"],
128+
)
129+
130+
scala_test_suite(
131+
name = "join_test",
132+
srcs = glob(["src/test/scala/ai/chronon/spark/test/join/*.scala"]),
133+
tags = ["large"],
134+
data = glob(["spark/src/test/resources/**/*"]),
135+
# defined in prelude_bazel file
136+
jvm_flags = _JVM_FLAGS_FOR_ACCESSING_BASE_JAVA_CLASSES,
137+
visibility = ["//visibility:public"],
138+
deps = test_deps + [":test_lib"],
139+
)
140+
141+
scala_test_suite(
142+
name = "analyzer_test",
143+
srcs = glob(["src/test/scala/ai/chronon/spark/test/analyzer/*.scala"]),
144+
data = glob(["spark/src/test/resources/**/*"]),
145+
# defined in prelude_bazel file
146+
jvm_flags = _JVM_FLAGS_FOR_ACCESSING_BASE_JAVA_CLASSES,
147+
visibility = ["//visibility:public"],
148+
deps = test_deps + [":test_lib"],
149+
)
150+
151+
scala_test_suite(
152+
name = "streaming_test",
153+
srcs = glob(["src/test/scala/ai/chronon/spark/test/streaming/*.scala"]),
100154
data = glob(["spark/src/test/resources/**/*"]),
101155
# defined in prelude_bazel file
102156
jvm_flags = _JVM_FLAGS_FOR_ACCESSING_BASE_JAVA_CLASSES,

spark/src/test/scala/ai/chronon/spark/test/AnalyzerTest.scala renamed to spark/src/test/scala/ai/chronon/spark/test/analyzer/AnalyzerTest.scala

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,25 +14,21 @@
1414
* limitations under the License.
1515
*/
1616

17-
package ai.chronon.spark.test
17+
package ai.chronon.spark.test.analyzer
1818

1919
import ai.chronon.aggregator.test.Column
2020
import ai.chronon.api
2121
import ai.chronon.api._
22-
import ai.chronon.spark.Analyzer
2322
import ai.chronon.spark.Extensions._
24-
import ai.chronon.spark.Join
25-
import ai.chronon.spark.SparkSessionBuilder
26-
import ai.chronon.spark.TableUtils
23+
import ai.chronon.spark.{Analyzer, Join, SparkSessionBuilder, TableUtils}
24+
import ai.chronon.spark.test.DataFrameGen
2725
import org.apache.spark.sql.SparkSession
28-
import org.apache.spark.sql.functions.col
29-
import org.apache.spark.sql.functions.lit
26+
import org.apache.spark.sql.functions.{col, lit}
3027
import org.junit.Assert.assertTrue
3128
import org.scalatest.BeforeAndAfter
3229
import org.scalatest.flatspec.AnyFlatSpec
3330
import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper
34-
import org.slf4j.Logger
35-
import org.slf4j.LoggerFactory
31+
import org.slf4j.{Logger, LoggerFactory}
3632

3733
class AnalyzerTest extends AnyFlatSpec with BeforeAndAfter {
3834
@transient lazy val logger: Logger = LoggerFactory.getLogger(getClass)

spark/src/test/scala/ai/chronon/spark/test/bootstrap/DerivationTest.scala renamed to spark/src/test/scala/ai/chronon/spark/test/analyzer/DerivationTest.scala

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* limitations under the License.
1515
*/
1616

17-
package ai.chronon.spark.test.bootstrap
17+
package ai.chronon.spark.test.analyzer
1818

1919
import ai.chronon.api.Builders.Derivation
2020
import ai.chronon.api.Extensions._
@@ -24,17 +24,14 @@ import ai.chronon.online.Fetcher.Request
2424
import ai.chronon.online.MetadataStore
2525
import ai.chronon.spark.Extensions.DataframeOps
2626
import ai.chronon.spark._
27-
import ai.chronon.spark.test.OnlineUtils
28-
import ai.chronon.spark.test.SchemaEvolutionUtils
27+
import ai.chronon.spark.test.{OnlineUtils, SchemaEvolutionUtils}
28+
import ai.chronon.spark.test.bootstrap.BootstrapUtils
2929
import ai.chronon.spark.utils.MockApi
3030
import org.apache.spark.sql.SparkSession
3131
import org.apache.spark.sql.functions._
32-
import org.junit.Assert.assertEquals
33-
import org.junit.Assert.assertFalse
34-
import org.junit.Assert.assertTrue
32+
import org.junit.Assert.{assertEquals, assertFalse, assertTrue}
3533
import org.scalatest.flatspec.AnyFlatSpec
36-
import org.slf4j.Logger
37-
import org.slf4j.LoggerFactory
34+
import org.slf4j.{Logger, LoggerFactory}
3835

3936
import scala.concurrent.Await
4037
import scala.concurrent.duration.Duration

spark/src/test/scala/ai/chronon/spark/test/ChainingFetcherTest.scala renamed to spark/src/test/scala/ai/chronon/spark/test/fetcher/ChainingFetcherTest.scala

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,31 +14,26 @@
1414
* limitations under the License.
1515
*/
1616

17-
package ai.chronon.spark.test
17+
package ai.chronon.spark.test.fetcher
1818

1919
import ai.chronon.aggregator.windowing.TsUtils
2020
import ai.chronon.api
2121
import ai.chronon.api.Constants.MetadataDataset
22-
import ai.chronon.api.Extensions.JoinOps
23-
import ai.chronon.api.Extensions.MetadataOps
22+
import ai.chronon.api.Extensions.{JoinOps, MetadataOps}
2423
import ai.chronon.api.ScalaJavaConversions._
2524
import ai.chronon.api._
2625
import ai.chronon.online.Fetcher.Request
27-
import ai.chronon.online.MetadataStore
28-
import ai.chronon.online.SparkConversions
26+
import ai.chronon.online.{MetadataStore, SparkConversions}
2927
import ai.chronon.spark.Extensions._
28+
import ai.chronon.spark.test.{OnlineUtils, TestUtils}
3029
import ai.chronon.spark.utils.MockApi
3130
import ai.chronon.spark.{Join => _, _}
32-
import org.apache.spark.sql.DataFrame
33-
import org.apache.spark.sql.Row
34-
import org.apache.spark.sql.SparkSession
3531
import org.apache.spark.sql.catalyst.expressions.GenericRow
3632
import org.apache.spark.sql.functions.lit
37-
import org.junit.Assert.assertEquals
38-
import org.junit.Assert.assertTrue
33+
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
34+
import org.junit.Assert.{assertEquals, assertTrue}
3935
import org.scalatest.flatspec.AnyFlatSpec
40-
import org.slf4j.Logger
41-
import org.slf4j.LoggerFactory
36+
import org.slf4j.{Logger, LoggerFactory}
4237

4338
import java.lang
4439
import java.util.TimeZone

0 commit comments

Comments
 (0)