zipline-ai
diff --git a/‎cloud_aws/src/main/scala/ai/chronon/integrations/aws/AwsApiImpl.scala
Lines changed: 1 addition & 1 deletion b/‎cloud_aws/src/main/scala/ai/chronon/integrations/aws/AwsApiImpl.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎cloud_gcp/BUILD.bazel
Lines changed: 1 addition & 0 deletions b/‎cloud_gcp/BUILD.bazel
Lines changed: 1 addition & 0 deletions
diff --git a/‎cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/GcpApiImpl.scala
Lines changed: 3 additions & 4 deletions b/‎cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/GcpApiImpl.scala
Lines changed: 3 additions & 4 deletions
diff --git a/‎cloud_gcp/src/test/scala/ai/chronon/integrations/cloud_gcp/DataprocSubmitterTest.scala
Lines changed: 4 additions & 2 deletions b/‎cloud_gcp/src/test/scala/ai/chronon/integrations/cloud_gcp/DataprocSubmitterTest.scala
Lines changed: 4 additions & 2 deletions
diff --git a/‎flink/src/main/scala/ai/chronon/flink/FlinkJob.scala
Lines changed: 4 additions & 9 deletions b/‎flink/src/main/scala/ai/chronon/flink/FlinkJob.scala
Lines changed: 4 additions & 9 deletions
diff --git a/‎flink/src/main/scala/ai/chronon/flink/KafkaFlinkSource.scala
Lines changed: 1 addition & 0 deletions b/‎flink/src/main/scala/ai/chronon/flink/KafkaFlinkSource.scala
Lines changed: 1 addition & 0 deletions
diff --git a/‎flink/src/main/scala/ai/chronon/flink/SchemaProvider.scala
Lines changed: 0 additions & 40 deletions b/‎flink/src/main/scala/ai/chronon/flink/SchemaProvider.scala
Lines changed: 0 additions & 40 deletions
diff --git a/‎flink/src/main/scala/ai/chronon/flink/SchemaRegistrySchemaProvider.scala
Lines changed: 0 additions & 111 deletions b/‎flink/src/main/scala/ai/chronon/flink/SchemaRegistrySchemaProvider.scala
Lines changed: 0 additions & 111 deletions
diff --git a/‎flink/src/main/scala/ai/chronon/flink/SparkExpressionEval.scala
Lines changed: 5 additions & 0 deletions b/‎flink/src/main/scala/ai/chronon/flink/SparkExpressionEval.scala
Lines changed: 5 additions & 0 deletions
diff --git a/‎flink/src/main/scala/ai/chronon/flink/deser/ChrononDeserializationSchema.scala
Lines changed: 38 additions & 0 deletions b/‎flink/src/main/scala/ai/chronon/flink/deser/ChrononDeserializationSchema.scala
Lines changed: 38 additions & 0 deletions
diff --git a/‎flink/src/main/scala/ai/chronon/flink/deser/CustomSchemaSerDe.scala
Lines changed: 21 additions & 0 deletions b/‎flink/src/main/scala/ai/chronon/flink/deser/CustomSchemaSerDe.scala
Lines changed: 21 additions & 0 deletions
@@ -42,7 +42,7 @@ class AwsApiImpl(conf: Map[String, String]) extends Api(conf) {
   /** The stream decoder method in the AwsApi is currently unimplemented. This needs to be implemented before
     * we can spin up the Aws streaming Chronon stack
     */
-  override def streamDecoder(groupByServingInfoParsed: GroupByServingInfoParsed): Serde = ???
+  override def streamDecoder(groupByServingInfoParsed: GroupByServingInfoParsed): SerDe = ???
 
   /** The external registry extension is currently unimplemented. We'll need to implement this prior to spinning up
     * a fully functional Chronon serving stack in Aws
 
@@ -35,6 +35,7 @@ shared_deps = [
     maven_artifact("ch.qos.reload4j:reload4j"),
     maven_artifact("org.threeten:threetenbp"),
     maven_artifact("org.apache.kafka:kafka-clients"),
+    maven_artifact("org.apache.avro:avro"),
     maven_artifact("com.google.cloud.spark:spark-3.5-bigquery"),
     maven_artifact_with_suffix("org.apache.iceberg:iceberg-spark-runtime-3.5"),
     maven_artifact("org.objenesis:objenesis"),
 
@@ -7,8 +7,7 @@ import ai.chronon.online.FlagStoreConstants
 import ai.chronon.online.GroupByServingInfoParsed
 import ai.chronon.online.KVStore
 import ai.chronon.online.LoggableResponse
-import ai.chronon.online.serde.Serde
-import ai.chronon.online.serde.AvroSerde
+import ai.chronon.online.serde.{AvroConversions, AvroSerDe, SerDe}
 import com.google.api.gax.core.{InstantiatingExecutorProvider, NoCredentialsProvider}
 import com.google.api.gax.retrying.RetrySettings
 import com.google.cloud.bigquery.BigQueryOptions
@@ -40,8 +39,8 @@ class GcpApiImpl(conf: Map[String, String]) extends Api(conf) {
   // We set the flag store to always return true for tiling enabled
   setFlagStore(tilingEnabledFlagStore)
 
-  override def streamDecoder(groupByServingInfoParsed: GroupByServingInfoParsed): Serde =
-    new AvroSerde(groupByServingInfoParsed.streamChrononSchema)
+  override def streamDecoder(groupByServingInfoParsed: GroupByServingInfoParsed): SerDe =
+    new AvroSerDe(AvroConversions.fromChrononSchema(groupByServingInfoParsed.streamChrononSchema))
 
   override def genKvStore: KVStore = {
 
 
@@ -76,7 +76,8 @@ class DataprocSubmitterTest extends AnyFlatSpec with MockitoSugar {
 
   it should "test flink kafka ingest job locally" ignore {
 
-    val submitter = DataprocSubmitter()
+    val submitterConf = SubmitterConf("canary-443022", "us-central1", "zipline-canary-cluster")
+    val submitter = DataprocSubmitter(submitterConf)
     val submittedJobId =
       submitter.submit(
         spark.submission.FlinkJob,
@@ -91,7 +92,8 @@ class DataprocSubmitterTest extends AnyFlatSpec with MockitoSugar {
         List.empty,
         "--kafka-bootstrap=bootstrap.zipline-kafka-cluster.us-central1.managedkafka.canary-443022.cloud.goog:9092",
         "--kafka-topic=test-beacon-main",
-        "--data-file-name=gs://zl-warehouse/beacon_events/beacon-output.avro"
+        "--data-file-name=gs://zl-warehouse/beacon_events/beacon-output.avro",
+        "--event-delay-millis=10",
       )
     println(submittedJobId)
   }
 
@@ -8,7 +8,7 @@ import ai.chronon.api.Extensions.GroupByOps
 import ai.chronon.api.Extensions.SourceOps
 import ai.chronon.api.ScalaJavaConversions._
 import ai.chronon.flink.FlinkJob.watermarkStrategy
-import ai.chronon.flink.SourceIdentitySchemaRegistrySchemaProvider.RegistryHostKey
+import ai.chronon.flink.deser.{DeserializationSchemaBuilder, FlinkSerDeProvider, SourceProjection}
 import ai.chronon.flink.types.AvroCodecOutput
 import ai.chronon.flink.types.TimestampedTile
 import ai.chronon.flink.types.WriteResponse
@@ -312,15 +312,10 @@ object FlinkJob {
     val topicUri = servingInfo.groupBy.streamingSource.get.topic
     val topicInfo = TopicInfo.parse(topicUri)
 
-    val schemaProvider =
-      topicInfo.params.get(RegistryHostKey) match {
-        case Some(_) => new ProjectedSchemaRegistrySchemaProvider(topicInfo.params)
-        case None =>
-          throw new IllegalArgumentException(
-            s"We only support schema registry based schema lookups. Missing $RegistryHostKey in topic config")
-      }
+    val schemaProvider = FlinkSerDeProvider.build(topicInfo)
 
-    val deserializationSchema = schemaProvider.buildDeserializationSchema(servingInfo.groupBy)
+    val deserializationSchema =
+      DeserializationSchemaBuilder.buildSourceProjectionDeserSchema(schemaProvider, servingInfo.groupBy)
     require(
       deserializationSchema.isInstanceOf[SourceProjection],
       s"Expect created deserialization schema for groupBy: $groupByName with $topicInfo to mixin SourceProjection. " +
 
@@ -1,5 +1,6 @@
 package ai.chronon.flink
 
+import ai.chronon.flink.deser.ChrononDeserializationSchema
 import ai.chronon.online.TopicChecker
 import ai.chronon.online.TopicInfo
 import org.apache.flink.api.common.eventtime.WatermarkStrategy
 
@@ -92,6 +92,11 @@ class SparkExpressionEval[EventType](encoder: Encoder[EventType], groupBy: Group
     result
   }
 
+  def performSql(row: Array[Any]): Seq[Map[String, Any]] = {
+    val internalRow = catalystUtil.inputArrEncoder(row).asInstanceOf[InternalRow]
+    performSql(internalRow)
+  }
+
   def evaluateExpressions(inputEvent: EventType,
                           rowSerializer: ExpressionEncoder.Serializer[EventType]): Seq[Map[String, Any]] = {
     try {
 
@@ -0,0 +1,38 @@
+package ai.chronon.flink.deser
+
+import ai.chronon.api
+import ai.chronon.api.GroupBy
+import ai.chronon.online.serde.SerDe
+import org.apache.flink.api.common.serialization.AbstractDeserializationSchema
+import org.apache.spark.sql.{Encoder, Row}
+
+/** DeserializationSchema for use within Chronon. Includes details such as the source event encoder and if projection is
+  * enabled, the projected schema. This is used to both build the Flink sources as well as in the downstream processing
+  * operators (e.g. SparkExprEval).
+  *
+  * @tparam T - Type of the object returned after deserialization. Can be event type (no projection)
+  *             or Map[String, Any] (with projection)
+  */
+abstract class ChrononDeserializationSchema[T] extends AbstractDeserializationSchema[T] {
+  def sourceProjectionEnabled: Boolean
+
+  def sourceEventEncoder: Encoder[Row]
+}
+
+/** Trait that is mixed in with DeserializationSchemas that support projection pushdown. This trait provides the projected
+  * schema that the source event will be projected to.
+  */
+trait SourceProjection {
+  def projectedSchema: Array[(String, api.DataType)]
+}
+
+object DeserializationSchemaBuilder {
+  def buildSourceIdentityDeserSchema(provider: SerDe, groupBy: GroupBy): ChrononDeserializationSchema[Row] = {
+    new SourceIdentityDeserializationSchema(provider, groupBy)
+  }
+
+  def buildSourceProjectionDeserSchema(provider: SerDe,
+                                       groupBy: GroupBy): ChrononDeserializationSchema[Map[String, Any]] = {
+    new SourceProjectionDeserializationSchema(provider, groupBy)
+  }
+}
@@ -0,0 +1,21 @@
+package ai.chronon.flink.deser
+
+import ai.chronon.online.TopicInfo
+import ai.chronon.online.serde.SerDe
+
+// Configured in topic config in this fashion:
+// kafka://test-beacon-main/provider_class=ai.chronon.flink.deser.MockCustomSchemaProvider/schema_name=beacon
+object CustomSchemaSerDe {
+  val ProviderClass = "provider_class"
+  val SchemaName = "schema_name"
+
+  def buildCustomSchemaSerDe(topicInfo: TopicInfo): SerDe = {
+    val cl = Thread.currentThread().getContextClassLoader // Use Flink's classloader
+    val providerClass =
+      topicInfo.params.getOrElse(ProviderClass, throw new IllegalArgumentException(s"$ProviderClass not set"))
+    val cls = cl.loadClass(providerClass)
+    val constructor = cls.getConstructors.apply(0)
+    val provider = constructor.newInstance(topicInfo)
+    provider.asInstanceOf[SerDe]
+  }
+}