Skip to content

Commit 7933f85

Browse files
committed
Drop Spark BigTable version to unlock DataProc submission
1 parent a4f162c commit 7933f85

File tree

3 files changed

+10
-2
lines changed

3 files changed

+10
-2
lines changed

Dockerfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,13 @@ RUN curl https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SP
5757
&& tar xvzf spark.tgz --directory /opt/spark --strip-components 1 \
5858
&& rm -rf spark.tgz
5959

60+
# Add some additional custom jars for other connectors like BigTable etc
61+
RUN mkdir -p /opt/custom-jars && \
62+
curl -L "https://repo1.maven.org/maven2/com/google/cloud/spark/bigtable/spark-bigtable_2.12/0.2.1/spark-bigtable_2.12-0.2.1.jar" \
63+
-o /opt/custom-jars/spark-bigtable_2.12-0.2.1.jar && \
64+
curl -L "https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-slf4j-impl/2.20.0/log4j-slf4j-impl-2.20.0.jar" \
65+
-o /opt/custom-jars/log4j-slf4j-impl-2.20.0.jar
66+
6067
# Install python deps
6168
COPY quickstart/requirements.txt .
6269
RUN pip3 install -r requirements.txt

build.sbt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,6 @@ lazy val cloud_gcp = project
217217
libraryDependencies += "com.google.cloud.bigdataoss" % "gcsio" % "3.0.3", // need it for https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/master/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java
218218
libraryDependencies += "io.circe" %% "circe-yaml" % "1.15.0",
219219
libraryDependencies += "com.google.cloud.spark" %% s"spark-bigquery-with-dependencies" % "0.41.0",
220-
libraryDependencies += "com.google.cloud.spark.bigtable" %% "spark-bigtable" % "0.2.1",
221220
libraryDependencies += "com.google.cloud.bigtable" % "bigtable-hbase-2.x" % "2.14.2",
222221
libraryDependencies ++= circe,
223222
libraryDependencies ++= avro,

quickstart/cloud_gcp/scripts/load_data.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ echo "GroupBy upload batch jobs completed successfully!"
3333

3434
echo "Uploading tables to KV Store"
3535
for dataset in purchases returns; do
36-
if ! spark-submit --driver-class-path "$CLASSPATH" --class ai.chronon.integrations.cloud_gcp.Spark2BigTableLoader \
36+
if ! spark-submit --driver-class-path "$CLASSPATH:/opt/custom-jars/*" \
37+
--jars "/opt/custom-jars/spark-bigtable_2.12-0.2.1.jar,/opt/custom-jars/log4j-slf4j-impl-2.20.0.jar" \
38+
--class ai.chronon.integrations.cloud_gcp.Spark2BigTableLoader \
3739
--master local[*] $CLOUD_GCP_JAR --table-name default.quickstart_${dataset}_v1_upload --dataset quickstart.${dataset}.v1 \
3840
--end-ds 2023-11-30 --project-id $GCP_PROJECT_ID --instance-id $GCP_INSTANCE_ID; then
3941
echo "Error: Failed to upload table to KV Store" >&2

0 commit comments

Comments
 (0)