zipline-ai
diff --git a/‎.github/workflows/test_scala_no_spark.yaml
Lines changed: 6 additions & 1 deletion b/‎.github/workflows/test_scala_no_spark.yaml
Lines changed: 6 additions & 1 deletion
diff --git a/‎build.sbt
Lines changed: 15 additions & 5 deletions b/‎build.sbt
Lines changed: 15 additions & 5 deletions
diff --git a/‎docker-init/Dockerfile
Lines changed: 1 addition & 0 deletions b/‎docker-init/Dockerfile
Lines changed: 1 addition & 0 deletions
diff --git a/‎docker-init/demo/README.md
Lines changed: 20 additions & 0 deletions b/‎docker-init/demo/README.md
Lines changed: 20 additions & 0 deletions
diff --git a/‎docker-init/demo/load_summaries.sh
Lines changed: 12 additions & 0 deletions b/‎docker-init/demo/load_summaries.sh
Lines changed: 12 additions & 0 deletions
diff --git a/‎docker-init/demo/log4j2.properties
Lines changed: 0 additions & 17 deletions b/‎docker-init/demo/log4j2.properties
Lines changed: 0 additions & 17 deletions
diff --git a/‎docker-init/start.sh
Lines changed: 0 additions & 17 deletions b/‎docker-init/start.sh
Lines changed: 0 additions & 17 deletions
diff --git a/‎hub/app/controllers/InMemKVStoreController.scala
Lines changed: 60 additions & 0 deletions b/‎hub/app/controllers/InMemKVStoreController.scala
Lines changed: 60 additions & 0 deletions
diff --git a/‎hub/app/controllers/JoinController.scala
Lines changed: 54 additions & 0 deletions b/‎hub/app/controllers/JoinController.scala
Lines changed: 54 additions & 0 deletions
diff --git a/‎hub/app/controllers/ModelController.scala
Lines changed: 2 additions & 3 deletions b/‎hub/app/controllers/ModelController.scala
Lines changed: 2 additions & 3 deletions
diff --git a/‎hub/app/controllers/Paginate.scala
Lines changed: 1 addition & 3 deletions b/‎hub/app/controllers/Paginate.scala
Lines changed: 1 addition & 3 deletions
diff --git a/‎hub/app/controllers/SearchController.scala
Lines changed: 11 additions & 12 deletions b/‎hub/app/controllers/SearchController.scala
Lines changed: 11 additions & 12 deletions
@@ -60,4 +60,9 @@ jobs:
 
       - name: Run api tests
         run: |
-          sbt "++ 2.12.18 api/test"
+          sbt "++ 2.12.18 api/test"
+
+      - name: Run hub tests
+        run: |
+          export SBT_OPTS="-Xmx8G -Xms2G"
+          sbt "++ 2.12.18 hub/test"
@@ -80,6 +80,13 @@ val jackson = Seq(
   "com.fasterxml.jackson.module" %% "jackson-module-scala"
 ).map(_ % jackson_2_15)
 
+// Circe is used to ser / deser case class payloads for the Hub Play webservice
+val circe = Seq(
+  "io.circe" %% "circe-core",
+  "io.circe" %% "circe-generic",
+  "io.circe" %% "circe-parser",
+).map(_ % circeVersion)
+
 val flink_all = Seq(
   "org.apache.flink" %% "flink-streaming-scala",
   "org.apache.flink" % "flink-metrics-dropwizard",
@@ -129,6 +136,8 @@ lazy val online = project
       "com.github.ben-manes.caffeine" % "caffeine" % "3.1.8"
     ),
     libraryDependencies ++= jackson,
+    // dep needed for HTTPKvStore - yank when we rip this out
+    libraryDependencies += "com.softwaremill.sttp.client3" %% "core" % "3.9.7",
     libraryDependencies ++= spark_all.map(_ % "provided"),
     libraryDependencies ++= flink_all.map(_ % "provided")
   )
@@ -236,20 +245,18 @@ lazy val frontend = (project in file("frontend"))
 // build interop between one module solely on 2.13 and others on 2.12 is painful
 lazy val hub = (project in file("hub"))
   .enablePlugins(PlayScala)
-  .dependsOn(cloud_aws)
+  .dependsOn(cloud_aws, spark)
   .settings(
     name := "hub",
     libraryDependencies ++= Seq(
       guice,
       "org.scalatestplus.play" %% "scalatestplus-play" % "5.1.0" % Test,
       "org.scalatestplus" %% "mockito-3-4" % "3.2.10.0" % "test",
-      "io.circe" %% "circe-core" % circeVersion,
-      "io.circe" %% "circe-generic" % circeVersion,
-      "io.circe" %% "circe-parser" % circeVersion,
       "org.scala-lang.modules" %% "scala-xml" % "2.1.0",
       "org.scala-lang.modules" %% "scala-parser-combinators" % "2.3.0",
       "org.scala-lang.modules" %% "scala-java8-compat" % "1.0.2"
     ),
+    libraryDependencies ++= circe,
     libraryDependencySchemes ++= Seq(
       "org.scala-lang.modules" %% "scala-xml" % VersionScheme.Always,
       "org.scala-lang.modules" %% "scala-parser-combinators" % VersionScheme.Always,
@@ -258,7 +265,10 @@ lazy val hub = (project in file("hub"))
     excludeDependencies ++= Seq(
       ExclusionRule(organization = "org.slf4j", name = "slf4j-log4j12"),
       ExclusionRule(organization = "log4j", name = "log4j"),
-      ExclusionRule(organization = "org.apache.logging.log4j", name = "log4j-to-slf4j")
+      ExclusionRule(organization = "org.apache.logging.log4j", name = "log4j-to-slf4j"),
+      ExclusionRule("org.apache.logging.log4j", "log4j-slf4j-impl"),
+      ExclusionRule("org.apache.logging.log4j", "log4j-core"),
+      ExclusionRule("org.apache.logging.log4j", "log4j-api")
     ),
     // Ensure consistent versions of logging libraries
     dependencyOverrides ++= Seq(
 
@@ -43,6 +43,7 @@ ENV CHRONON_DRIVER_JAR="/app/cli/spark.jar"
 # Set up Spark dependencies to help with launching CLI
 # Copy Spark JARs from the Bitnami image
 COPY --from=spark-source /opt/bitnami/spark/jars /opt/spark/jars
+COPY --from=spark-source /opt/bitnami/spark/bin /opt/spark/bin
 
 # Add all Spark JARs to the classpath
 ENV CLASSPATH=/opt/spark/jars/*
 
@@ -1,5 +1,25 @@
+# Populate Observability Demo Data
+To populate the observability demo data:
+* Launch the set of docker containers:
+```bash
+~/workspace/chronon $ docker-compose -f docker-init/compose.yaml up --build
+...
+app-1           | [info] 2024-11-26 05:10:45,758 [main] INFO  play.api.Play - Application started (Prod) (no global state)
+app-1           | [info] 2024-11-26 05:10:45,958 [main] INFO  play.core.server.AkkaHttpServer - Listening for HTTP on /[0:0:0:0:0:0:0:0]:9000
+```
+(you can skip the --build if you don't wish to rebuild your code)
+
+Now you can trigger the script to load summary data:
+```bash
+~/workspace/chronon $ docker-init/demo/load_summaries.sh
+...
+Done uploading summaries! 🥳
+```
+
+# Streamlit local experimentation
 run build.sh once, and you can repeatedly exec to quickly visualize 
 
 In first terminal: `sbt spark/assembly` 
 In second terminal: `./run.sh` to load the built jar and serve the data on localhost:8181
 In third terminal: `streamlit run viz.py`
+
@@ -0,0 +1,12 @@
+# Kick off the ObsDemo spark job in the app container
+
+docker-compose -f docker-init/compose.yaml exec app /opt/spark/bin/spark-submit \
+  --master "local[*]" \
+  --driver-memory 8g \
+  --conf "spark.driver.maxResultSize=6g" \
+  --conf "spark.driver.memory=8g" \
+  --driver-class-path "/opt/spark/jars/*:/app/cli/*" \
+  --conf "spark.driver.host=localhost" \
+  --conf "spark.driver.bindAddress=0.0.0.0" \
+  --class ai.chronon.spark.scripts.ObservabilityDemoDataLoader \
+  /app/cli/spark.jar
@@ -39,23 +39,6 @@ echo "DynamoDB Table created successfully!"
 
 start_time=$(date +%s)
 
-if ! java \
-  --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
-  --add-opens=java.base/sun.security.action=ALL-UNNAMED \
-  -cp $SPARK_JAR:$CLASSPATH ai.chronon.spark.Driver summarize-and-upload \
-  --online-jar=$CLOUD_AWS_JAR \
-  --online-class=$ONLINE_CLASS \
-  --parquet-path="$(pwd)/drift_data" \
-  --conf-path=/chronon_sample/production/ \
-  --time-column=transaction_time; then
-  echo "Error: Failed to load summary data into DynamoDB" >&2
-  exit 1
-else
-  end_time=$(date +%s)
-  elapsed_time=$((end_time - start_time))
-  echo "Summary load completed successfully! Took $elapsed_time seconds."
-fi
-
 # Add these java options as without them we hit the below error:
 # throws java.lang.ClassFormatError accessible: module java.base does not "opens java.lang" to unnamed module @36328710
 export JAVA_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED"
 
@@ -0,0 +1,60 @@
+package controllers
+
+import ai.chronon.online.KVStore
+import ai.chronon.online.KVStore.PutRequest
+import io.circe.Codec
+import io.circe.Decoder
+import io.circe.Encoder
+import io.circe.generic.semiauto.deriveCodec
+import io.circe.parser.decode
+import play.api.Logger
+import play.api.mvc
+import play.api.mvc.BaseController
+import play.api.mvc.ControllerComponents
+import play.api.mvc.RawBuffer
+
+import java.util.Base64
+import javax.inject.Inject
+import scala.concurrent.ExecutionContext
+import scala.concurrent.Future
+
+class InMemKVStoreController @Inject() (val controllerComponents: ControllerComponents, kvStore: KVStore)(implicit
+    ec: ExecutionContext)
+    extends BaseController {
+
+  import PutRequestCodec._
+
+  val logger: Logger = Logger(this.getClass)
+
+  def bulkPut(): mvc.Action[RawBuffer] =
+    Action(parse.raw).async { request =>
+      request.body.asBytes() match {
+        case Some(bytes) =>
+          decode[Array[PutRequest]](bytes.utf8String) match {
+            case Right(putRequests) =>
+              logger.debug(s"Attempting a bulkPut with ${putRequests.length} items")
+              val resultFuture = kvStore.multiPut(putRequests)
+              resultFuture.map { responses =>
+                if (responses.contains(false)) {
+                  logger.warn("Some write failures encountered")
+                }
+                Ok("Success")
+              }
+            case Left(error) => Future.successful(BadRequest(error.getMessage))
+          }
+        case None => Future.successful(BadRequest("Empty body"))
+      }
+    }
+}
+
+object PutRequestCodec {
+  // Custom codec for byte arrays using Base64
+  implicit val byteArrayEncoder: Encoder[Array[Byte]] =
+    Encoder.encodeString.contramap[Array[Byte]](Base64.getEncoder.encodeToString)
+
+  implicit val byteArrayDecoder: Decoder[Array[Byte]] =
+    Decoder.decodeString.map(Base64.getDecoder.decode)
+
+  // Derive codec for PutRequest
+  implicit val putRequestCodec: Codec[PutRequest] = deriveCodec[PutRequest]
+}
@@ -0,0 +1,54 @@
+package controllers
+
+import io.circe.generic.auto._
+import io.circe.syntax._
+import model.ListJoinResponse
+import play.api.mvc._
+import store.MonitoringModelStore
+
+import javax.inject._
+
+/**
+  * Controller for the Zipline Join entities
+  */
+@Singleton
+class JoinController @Inject() (val controllerComponents: ControllerComponents, monitoringStore: MonitoringModelStore)
+    extends BaseController
+    with Paginate {
+
+  /**
+    * Powers the /api/v1/joins endpoint. Returns a list of models
+    * @param offset - For pagination. We skip over offset entries before returning results
+    * @param limit - Number of elements to return
+    */
+  def list(offset: Option[Int], limit: Option[Int]): Action[AnyContent] =
+    Action { implicit request: Request[AnyContent] =>
+      // Default values if the parameters are not provided
+      val offsetValue = offset.getOrElse(defaultOffset)
+      val limitValue = limit.map(l => math.min(l, maxLimit)).getOrElse(defaultLimit)
+
+      if (offsetValue < 0) {
+        BadRequest("Invalid offset - expect a positive number")
+      } else if (limitValue < 0) {
+        BadRequest("Invalid limit - expect a positive number")
+      } else {
+        val joins = monitoringStore.getJoins
+        val paginatedResults = paginateResults(joins, offsetValue, limitValue)
+        val json = ListJoinResponse(offsetValue, paginatedResults).asJson.noSpaces
+        Ok(json)
+      }
+    }
+
+  /**
+    * Returns a specific join by name
+    */
+  def get(name: String): Action[AnyContent] = {
+    Action { implicit request: Request[AnyContent] =>
+      val maybeJoin = monitoringStore.getJoins.find(j => j.name.equalsIgnoreCase(name))
+      maybeJoin match {
+        case None       => NotFound(s"Join: $name wasn't found")
+        case Some(join) => Ok(join.asJson.noSpaces)
+      }
+    }
+  }
+}
@@ -4,16 +4,15 @@ import io.circe.generic.auto._
 import io.circe.syntax._
 import model.ListModelResponse
 import play.api.mvc._
-import store.DynamoDBMonitoringStore
+import store.MonitoringModelStore
 
 import javax.inject._
 
 /**
   * Controller for the Zipline models entities
   */
 @Singleton
-class ModelController @Inject() (val controllerComponents: ControllerComponents,
-                                 monitoringStore: DynamoDBMonitoringStore)
+class ModelController @Inject() (val controllerComponents: ControllerComponents, monitoringStore: MonitoringModelStore)
     extends BaseController
     with Paginate {
 
 
@@ -1,13 +1,11 @@
 package controllers
 
-import model.Model
-
 trait Paginate {
   val defaultOffset = 0
   val defaultLimit = 10
   val maxLimit = 100
 
-  def paginateResults(results: Seq[Model], offset: Int, limit: Int): Seq[Model] = {
+  def paginateResults[T](results: Seq[T], offset: Int, limit: Int): Seq[T] = {
     results.slice(offset, offset + limit)
   }
 }
@@ -2,24 +2,23 @@ package controllers
 
 import io.circe.generic.auto._
 import io.circe.syntax._
-import model.Model
-import model.SearchModelResponse
+import model.Join
+import model.SearchJoinResponse
 import play.api.mvc._
-import store.DynamoDBMonitoringStore
+import store.MonitoringModelStore
 
 import javax.inject._
 
 /**
   * Controller to power search related APIs
   */
-class SearchController @Inject() (val controllerComponents: ControllerComponents,
-                                  monitoringStore: DynamoDBMonitoringStore)
+class SearchController @Inject() (val controllerComponents: ControllerComponents, monitoringStore: MonitoringModelStore)
     extends BaseController
     with Paginate {
 
   /**
-    * Powers the /api/v1/search endpoint. Returns a list of models
-    * @param term - Search term to search for (currently we only support searching model names)
+    * Powers the /api/v1/search endpoint. Returns a list of joins
+    * @param term - Search term to search for (currently we only support searching join names)
     * @param offset - For pagination. We skip over offset entries before returning results
     * @param limit - Number of elements to return
     */
@@ -36,14 +35,14 @@ class SearchController @Inject() (val controllerComponents: ControllerComponents
       } else {
         val searchResults = searchRegistry(term)
         val paginatedResults = paginateResults(searchResults, offsetValue, limitValue)
-        val json = SearchModelResponse(offsetValue, paginatedResults).asJson.noSpaces
+        val json = SearchJoinResponse(offsetValue, paginatedResults).asJson.noSpaces
         Ok(json)
       }
     }
 
-  // a trivial search where we check the model name for similarity with the search term
-  private def searchRegistry(term: String): Seq[Model] = {
-    val models = monitoringStore.getModels
-    models.filter(m => m.name.contains(term))
+  // a trivial search where we check the join name for similarity with the search term
+  private def searchRegistry(term: String): Seq[Join] = {
+    val joins = monitoringStore.getJoins
+    joins.filter(j => j.name.contains(term))
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -1,13 +1,11 @@`
`1`	`1`	`package controllers`
`2`	`2`
`3`		`-import model.Model`
`4`		`-`
`5`	`3`	`trait Paginate {`
`6`	`4`	`val defaultOffset = 0`
`7`	`5`	`val defaultLimit = 10`
`8`	`6`	`val maxLimit = 100`
`9`	`7`
`10`		`- def paginateResults(results: Seq[Model], offset: Int, limit: Int): Seq[Model] = {`
	`8`	`+ def paginateResults[T](results: Seq[T], offset: Int, limit: Int): Seq[T] = {`
`11`	`9`	`results.slice(offset, offset + limit)`
`12`	`10`	`}`
`13`	`11`	`}`