Skip to content

Migrate from Play to Vert.x for Hub backend service #118

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_scala_no_spark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,4 @@ jobs:
- name: Run service tests
run: |
export SBT_OPTS="-Xmx8G -Xms2G"
sbt "++ 2.12.18 service/test"
sbt "++ 2.12.18 service/test"
116 changes: 78 additions & 38 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ lazy val circeVersion = "0.14.9"
lazy val deltaVersion = "3.2.0"
lazy val slf4jApiVersion = "2.0.12"
lazy val logbackClassicVersion = "1.5.6"
lazy val vertxVersion = "4.5.10"

// skip tests on assembly - uncomment if builds become slow
// ThisBuild / assembly / test := {}
Expand All @@ -55,7 +56,7 @@ inThisBuild(
lazy val supportedVersions = List(scala_2_12) // List(scala211, scala212, scala213)

lazy val root = (project in file("."))
.aggregate(api, aggregator, online, spark, flink, cloud_gcp, cloud_aws, hub, service)
.aggregate(api, aggregator, online, spark, flink, cloud_gcp, cloud_aws, service_commons, service, hub)
.settings(name := "chronon")

val spark_sql = Seq(
Expand Down Expand Up @@ -100,6 +101,14 @@ val flink_all = Seq(
"org.apache.flink" % "flink-clients"
).map(_ % flink_1_17)

val vertx_java = Seq(
"io.vertx" % "vertx-core",
"io.vertx" % "vertx-web",
"io.vertx" % "vertx-config",
// wire up metrics using micro meter and statsd
"io.vertx" % "vertx-micrometer-metrics",
).map(_ % vertxVersion)

val avro = Seq("org.apache.avro" % "avro" % "1.11.3")

lazy val api = project
Expand Down Expand Up @@ -249,68 +258,99 @@ lazy val frontend = (project in file("frontend"))
}
)

// We use Play 2.x (version defined in plugins.sbt) as many of our modules are still on Scala 2.12
// build interop between one module solely on 2.13 and others on 2.12 is painful
lazy val hub = (project in file("hub"))
.enablePlugins(PlayScala)
.dependsOn(cloud_aws, spark)
lazy val service_commons = (project in file("service_commons"))
.dependsOn(online)
.settings(
name := "hub",
libraryDependencies ++= vertx_java,
libraryDependencies ++= Seq(
guice,
"org.scalatestplus.play" %% "scalatestplus-play" % "5.1.0" % Test,
"org.scalatestplus" %% "mockito-3-4" % "3.2.10.0" % "test",
"org.scala-lang.modules" %% "scala-xml" % "2.1.0",
"org.scala-lang.modules" %% "scala-parser-combinators" % "2.3.0",
"org.scala-lang.modules" %% "scala-java8-compat" % "1.0.2"
),
libraryDependencies ++= circe,
libraryDependencySchemes ++= Seq(
"org.scala-lang.modules" %% "scala-xml" % VersionScheme.Always,
"org.scala-lang.modules" %% "scala-parser-combinators" % VersionScheme.Always,
"org.scala-lang.modules" %% "scala-java8-compat" % VersionScheme.Always
),
excludeDependencies ++= Seq(
ExclusionRule(organization = "org.slf4j", name = "slf4j-log4j12"),
ExclusionRule(organization = "log4j", name = "log4j"),
ExclusionRule(organization = "org.apache.logging.log4j", name = "log4j-to-slf4j"),
ExclusionRule("org.apache.logging.log4j", "log4j-slf4j-impl"),
ExclusionRule("org.apache.logging.log4j", "log4j-core"),
ExclusionRule("org.apache.logging.log4j", "log4j-api")
),
// Ensure consistent versions of logging libraries
dependencyOverrides ++= Seq(
"ch.qos.logback" % "logback-classic" % logbackClassicVersion,
"org.slf4j" % "slf4j-api" % slf4jApiVersion,
"ch.qos.logback" % "logback-classic" % logbackClassicVersion
)
"com.typesafe" % "config" % "1.4.3",
// force netty versions -> without this we conflict with the versions pulled in from
// our online module's spark deps which causes the web-app to not serve up content
"io.netty" % "netty-all" % "4.1.111.Final",
// wire up metrics using micro meter and statsd
"io.micrometer" % "micrometer-registry-statsd" % "1.13.6",
),
)

lazy val service = (project in file("service"))
.dependsOn(online)
.dependsOn(online, service_commons)
.settings(
assembly / assemblyJarName := s"${name.value}-${version.value}.jar",
assembly / artifact := {
val art = (assembly / artifact).value
art.withClassifier(Some("assembly"))
},
addArtifact(assembly / artifact, assembly),
libraryDependencies ++= vertx_java,
libraryDependencies ++= Seq(
"io.vertx" % "vertx-core" % "4.5.10",
"io.vertx" % "vertx-web" % "4.5.10",
"io.vertx" % "vertx-config" % "4.5.10",
"ch.qos.logback" % "logback-classic" % logbackClassicVersion,
"org.slf4j" % "slf4j-api" % slf4jApiVersion,
"com.typesafe" % "config" % "1.4.3",
// force netty versions -> without this we conflict with the versions pulled in from
// our online module's spark deps which causes the web-app to not serve up content
"io.netty" % "netty-all" % "4.1.111.Final",
// wire up metrics using micro meter and statsd
"io.vertx" % "vertx-micrometer-metrics" % "4.5.10",
"io.micrometer" % "micrometer-registry-statsd" % "1.13.6",
"junit" % "junit" % "4.13.2" % Test,
"com.novocode" % "junit-interface" % "0.11" % Test,
"org.mockito" % "mockito-core" % "5.12.0" % Test,
"io.vertx" % "vertx-unit" % "4.5.10" % Test,
"io.vertx" % "vertx-unit" % vertxVersion % Test,
),
// Assembly settings
assembly / assemblyJarName := s"${name.value}-${version.value}.jar",

// Main class configuration
// We use a custom launcher to help us wire up our statsd metrics
Compile / mainClass := Some("ai.chronon.service.ChrononServiceLauncher"),
assembly / mainClass := Some("ai.chronon.service.ChrononServiceLauncher"),

// Merge strategy for assembly
assembly / assemblyMergeStrategy := {
case PathList("META-INF", "MANIFEST.MF") => MergeStrategy.discard
case PathList("META-INF", xs @ _*) => MergeStrategy.first
case PathList("javax", "activation", xs @ _*) => MergeStrategy.first
case PathList("org", "apache", "logging", xs @ _*) => MergeStrategy.first
case PathList("org", "slf4j", xs @ _*) => MergeStrategy.first
case "application.conf" => MergeStrategy.concat
case "reference.conf" => MergeStrategy.concat
case x =>
val oldStrategy = (assembly / assemblyMergeStrategy).value
oldStrategy(x)
}
)

lazy val hub = (project in file("hub"))
.dependsOn(online, service_commons, spark)
.settings(
assembly / assemblyJarName := s"${name.value}-${version.value}.jar",
assembly / artifact := {
val art = (assembly / artifact).value
art.withClassifier(Some("assembly"))
},
addArtifact(assembly / artifact, assembly),
libraryDependencies ++= vertx_java,
libraryDependencies ++= circe,
libraryDependencies ++= Seq(
"ch.qos.logback" % "logback-classic" % logbackClassicVersion,
"org.slf4j" % "slf4j-api" % slf4jApiVersion,
"com.typesafe" % "config" % "1.4.3",
// force netty versions -> without this we conflict with the versions pulled in from
// our online module's spark deps which causes the web-app to not serve up content
"io.netty" % "netty-all" % "4.1.111.Final",
// wire up metrics using micro meter and statsd
"io.micrometer" % "micrometer-registry-statsd" % "1.13.6",

// need this to prevent a NoClassDef error on org/json4s/Formats
"org.json4s" %% "json4s-core" % "3.7.0-M11",

Comment on lines +345 to +347
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Use Stable Version for json4s-core Dependency

The dependency "org.json4s" %% "json4s-core" % "3.7.0-M11" is a milestone release. Milestone versions may not be stable and can introduce unexpected issues. It's recommended to use the latest stable release of json4s-core to ensure reliability.

Update the dependency to the latest stable version. For example:

- "org.json4s" %% "json4s-core" % "3.7.0-M11",
+ "org.json4s" %% "json4s-core" % "3.6.11",

Please verify the compatibility with your project.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// need this to prevent a NoClassDef error on org/json4s/Formats
"org.json4s" %% "json4s-core" % "3.7.0-M11",
// need this to prevent a NoClassDef error on org/json4s/Formats
"org.json4s" %% "json4s-core" % "3.6.11",

"junit" % "junit" % "4.13.2" % Test,
"com.novocode" % "junit-interface" % "0.11" % Test,
"org.mockito" % "mockito-core" % "5.12.0" % Test,
"io.vertx" % "vertx-unit" % vertxVersion % Test,
"org.scalatest" %% "scalatest" % "3.2.19" % "test",
"org.scalatestplus" %% "mockito-3-4" % "3.2.10.0" % "test",
),
// Assembly settings
assembly / assemblyJarName := s"${name.value}-${version.value}.jar",
Expand Down
6 changes: 2 additions & 4 deletions docker-init/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,8 @@ COPY --from=spark-source /opt/bitnami/spark/bin /opt/spark/bin
ENV CLASSPATH=/opt/spark/jars/*

# Copy frontend + play zipped dist and set up app directory structure
COPY ./hub/target/universal/hub-0.1.0-SNAPSHOT.zip /app
RUN unzip hub-0.1.0-SNAPSHOT.zip -d /app/hub && \
cp -r hub/hub-0.1.0-SNAPSHOT/* hub/. && \
rm -rf hub/hub-0.1.0-SNAPSHOT hub-0.1.0-SNAPSHOT.zip
COPY ./hub/target/scala-2.12/hub-0.1.0-SNAPSHOT.jar /app
COPY ./docker-init/hub/config.json /app

EXPOSE 9000

Expand Down
8 changes: 0 additions & 8 deletions docker-init/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ function print_usage() {
echo " --clean Clean and build all modules"
echo " --spark Build Spark modules only"
echo " --frontend Build frontend modules only"
echo " --hub Build Hub modules only"
echo " -h, --help Show this help message"
}

Expand Down Expand Up @@ -63,7 +62,6 @@ fi
if [ "$BUILD_ALL" = true ]; then
echo "Building all modules..."
sbt assembly
sbt dist
sbt "project frontend" buildFrontend
fi

Expand All @@ -73,12 +71,6 @@ if [ "$BUILD_SPARK" = true ]; then
sbt assembly
fi

# Build Hub modules
if [ "$BUILD_HUB" = true ]; then
echo "Building Hub distribution..."
sbt dist
fi

# Build frontend
if [ "$BUILD_FRONTEND" = true ]; then
echo "Building frontend distribution..."
Expand Down
8 changes: 8 additions & 0 deletions docker-init/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,19 @@ services:
- SPARK_SSL_ENABLED=no
- SPARK_USER=spark

statsd:
image: node:latest
ports:
- "8125:8125/udp"
command: sh -c "npm install -g statsd-logger && statsd-logger > /dev/null 2>&1"
Comment on lines +44 to +48
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve the statsd service configuration for better stability and observability.

Several improvements can be made to the statsd service configuration:

   statsd:
-    image: node:latest
+    image: node:20.11.0  # or another specific version
     ports:
       - "8125:8125/udp"
-    command: sh -c "npm install -g statsd-logger && statsd-logger > /dev/null 2>&1"
+    command: statsd-logger

Consider:

  1. Using a specific node version instead of 'latest' for better reproducibility
  2. Pre-installing statsd-logger in a custom Dockerfile rather than at runtime
  3. Allowing service logs for better debugging

Committable suggestion skipped: line range outside the PR's diff.


app:
build:
context: ..
dockerfile: docker-init/Dockerfile
depends_on:
- dynamo
- statsd
environment:
- DYNAMO_ENDPOINT=http://dynamo:8000
- AWS_DEFAULT_REGION=us-west-2
Expand All @@ -57,6 +64,7 @@ services:
- SPARK_JAR=/app/cli/spark.jar
- CLOUD_AWS_JAR=/app/cli/cloud_aws.jar
- ONLINE_CLASS=ai.chronon.integrations.aws.AwsApiImpl
- STATSD_HOST=statsd
ports:
- "9000:9000"
healthcheck:
Expand Down
5 changes: 5 additions & 0 deletions docker-init/hub/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"online.jar": "/app/cli/cloud_aws.jar",
"online.class": "ai.chronon.integrations.aws.AwsApiImpl",
"online.api.props": {}
}
2 changes: 1 addition & 1 deletion docker-init/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,4 @@ start_time=$(date +%s)
# Add these java options as without them we hit the below error:
# throws java.lang.ClassFormatError accessible: module java.base does not "opens java.lang" to unnamed module @36328710
export JAVA_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED"
exec "./hub/bin/hub"
exec java -jar hub-0.1.0-SNAPSHOT.jar run ai.chronon.hub.HubVerticle -Dserver.port=9000 -Dai.chronon.metrics.host=$STATSD_HOST -conf config.json
15 changes: 0 additions & 15 deletions hub/app/controllers/ApplicationController.scala

This file was deleted.

60 changes: 0 additions & 60 deletions hub/app/controllers/InMemKVStoreController.scala

This file was deleted.

54 changes: 0 additions & 54 deletions hub/app/controllers/JoinController.scala

This file was deleted.

Loading
Loading