zipline-ai
diff --git a/‎api/py/ai/chronon/cli/compile/parse_teams.py
Lines changed: 2 additions & 0 deletions b/‎api/py/ai/chronon/cli/compile/parse_teams.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎api/src/main/scala/ai/chronon/api/Builders.scala
Lines changed: 17 additions & 5 deletions b/‎api/src/main/scala/ai/chronon/api/Builders.scala
Lines changed: 17 additions & 5 deletions
diff --git a/‎api/src/main/scala/ai/chronon/api/Extensions.scala
Lines changed: 16 additions & 21 deletions b/‎api/src/main/scala/ai/chronon/api/Extensions.scala
Lines changed: 16 additions & 21 deletions
diff --git a/‎api/src/test/scala/ai/chronon/api/test/ExtensionsTest.scala
Lines changed: 22 additions & 34 deletions b/‎api/src/test/scala/ai/chronon/api/test/ExtensionsTest.scala
Lines changed: 22 additions & 34 deletions
diff --git a/‎api/thrift/api.thrift
Lines changed: 9 additions & 5 deletions b/‎api/thrift/api.thrift
Lines changed: 9 additions & 5 deletions
diff --git a/‎api/thrift/common.thrift
Lines changed: 24 additions & 0 deletions b/‎api/thrift/common.thrift
Lines changed: 24 additions & 0 deletions
diff --git a/‎docs/source/Tiled_Architecture.md
Lines changed: 1 addition & 3 deletions b/‎docs/source/Tiled_Architecture.md
Lines changed: 1 addition & 3 deletions
diff --git a/‎flink/src/main/scala/ai/chronon/flink/window/FlinkRowAggregators.scala
Lines changed: 27 additions & 2 deletions b/‎flink/src/main/scala/ai/chronon/flink/window/FlinkRowAggregators.scala
Lines changed: 27 additions & 2 deletions
diff --git a/‎online/src/main/scala/ai/chronon/online/GroupByServingInfoParsed.scala
Lines changed: 0 additions & 1 deletion b/‎online/src/main/scala/ai/chronon/online/GroupByServingInfoParsed.scala
Lines changed: 0 additions & 1 deletion
diff --git a/‎online/src/main/scala/ai/chronon/online/MetadataEndPoint.scala
Lines changed: 4 additions & 14 deletions b/‎online/src/main/scala/ai/chronon/online/MetadataEndPoint.scala
Lines changed: 4 additions & 14 deletions
@@ -123,6 +123,7 @@ def _merge_mode_maps(
                 result.backfill = _merge_maps(result.common, result.backfill)
                 result.upload = _merge_maps(result.common, result.upload)
                 result.streaming = _merge_maps(result.common, result.streaming)
+                result.serving = _merge_maps(result.common, result.serving)
                 result.common = None
             continue
 
@@ -135,5 +136,6 @@ def _merge_mode_maps(
         result.streaming = _merge_maps(
             result.streaming, mode_map.common, mode_map.streaming
         )
+        result.serving = _merge_maps(result.serving, mode_map.common, mode_map.serving)
 
     return result
@@ -264,14 +264,15 @@ object Builders {
         online: Boolean = false,
         production: Boolean = false,
         customJson: String = null,
-        dependencies: Seq[String] = null,
         namespace: String = null,
         team: String = null,
         samplePercent: Double = 100,
         consistencySamplePercent: Double = 5,
         tableProperties: Map[String, String] = Map.empty,
         historicalBackfill: Boolean = true,
-        driftSpec: DriftSpec = null
+        driftSpec: DriftSpec = null,
+        additionalOutputPartitionColumns: Seq[String] = Seq.empty,
+        executionInfo: ExecutionInfo = null
     ): MetaData = {
       val result = new MetaData()
       result.setName(name)
@@ -287,9 +288,7 @@ object Builders {
       }
 
       result.setTeam(effectiveTeam)
-      val executionInfo = new ExecutionInfo()
-        .setHistoricalBackfill(historicalBackfill)
-      result.setExecutionInfo(executionInfo)
+
       if (samplePercent > 0)
         result.setSamplePercent(samplePercent)
       if (consistencySamplePercent > 0)
@@ -298,6 +297,19 @@ object Builders {
         result.setTableProperties(tableProperties.toJava)
       if (driftSpec != null)
         result.setDriftSpec(driftSpec)
+
+      if (executionInfo != null) {
+        result.setExecutionInfo(executionInfo.setHistoricalBackfill(historicalBackfill))
+      } else {
+        result.setExecutionInfo(
+          new ExecutionInfo()
+            .setHistoricalBackfill(historicalBackfill))
+      }
+
+      if (additionalOutputPartitionColumns.nonEmpty) {
+        result.setAdditionalOutputPartitionColumns(additionalOutputPartitionColumns.toJava)
+      }
+
       result
     }
   }
 
@@ -154,20 +154,6 @@ object Extensions {
     @deprecated("Use `name` instead.")
     def nameToFilePath: String = metaData.name.replaceFirst("\\.", "/")
 
-    // helper function to extract values from customJson
-    def customJsonLookUp(key: String): Any = {
-      if (metaData.customJson == null) return null
-      val mapper = new ObjectMapper()
-      val typeRef = new TypeReference[java.util.HashMap[String, Object]]() {}
-      val jMap: java.util.Map[String, Object] = mapper.readValue(metaData.customJson, typeRef)
-      jMap.toScala.get(key).orNull
-    }
-
-    def owningTeam: String = {
-      val teamOverride = Try(customJsonLookUp(Constants.TeamOverride).asInstanceOf[String]).toOption
-      teamOverride.getOrElse(metaData.team)
-    }
-
     // if drift spec is set but tile size is not set, default to 30 minutes
     def driftTileSize: Option[Window] = {
       Option(metaData.getDriftSpec) match {
@@ -460,13 +446,6 @@ object Extensions {
       }
     }
 
-    // Check if tiling is enabled for a given GroupBy. Defaults to false if the 'enable_tiling' flag isn't set.
-    def isTilingEnabled: Boolean =
-      groupBy.getMetaData.customJsonLookUp("enable_tiling") match {
-        case s: Boolean => s
-        case _          => false
-      }
-
     def semanticHash: String = {
       val newGroupBy = groupBy.deepCopy()
       newGroupBy.unsetMetaData()
@@ -600,6 +579,22 @@ object Extensions {
       QueryParts(allSelects, wheres)
     }
 
+    def servingFlagValue(flag: String): Option[String] = {
+      for (
+        execInfo <- Option(groupBy.metaData.executionInfo);
+        conf <- Option(execInfo.conf);
+        servingConf <- Option(conf.serving);
+        value <- Option(servingConf.get(flag))
+      ) {
+        return Some(value)
+      }
+      None
+    }
+
+    def tilingFlag: Boolean = servingFlagValue("tiling").exists(_.toLowerCase() == "true")
+
+    def dontThrowOnDecodeFailFlag: Boolean = servingFlagValue("decode.throw_on_fail").exists(_.toLowerCase() == "false")
+
     // build left streaming query for join source runner
     def buildLeftStreamingQuery(query: Query, defaultFieldNames: Seq[String]): String = {
       val queryParts = groupBy.buildQueryParts(query)
 
@@ -16,17 +16,11 @@
 
 package ai.chronon.api.test
 
-import ai.chronon.api.Accuracy
-import ai.chronon.api.Builders
-import ai.chronon.api.Constants
 import ai.chronon.api.Extensions._
-import ai.chronon.api.GroupBy
 import ai.chronon.api.ScalaJavaConversions._
-import org.junit.Assert.assertEquals
-import org.junit.Assert.assertFalse
-import org.junit.Assert.assertTrue
-import org.mockito.Mockito.spy
-import org.mockito.Mockito.when
+import ai.chronon.api.{Accuracy, Builders, ConfigProperties, Constants, ExecutionInfo, GroupBy}
+import org.junit.Assert.{assertEquals, assertFalse, assertTrue}
+import org.mockito.Mockito.{spy, when}
 import org.scalatest.flatspec.AnyFlatSpec
 
 import java.util.Arrays
@@ -41,24 +35,6 @@ class ExtensionsTest extends AnyFlatSpec {
     )
   }
 
-  it should "owning team" in {
-    val metadata =
-      Builders.MetaData(
-        customJson = "{\"check_consistency\": true, \"lag\": 0, \"team_override\": \"ml_infra\"}",
-        team = "chronon"
-      )
-
-    assertEquals(
-      "ml_infra",
-      metadata.owningTeam
-    )
-
-    assertEquals(
-      "chronon",
-      metadata.team
-    )
-  }
-
   it should "row identifier" in {
     val labelPart = Builders.LabelPart();
     val res = labelPart.rowIdentifier(Arrays.asList("yoyo", "yujia"), "ds")
@@ -142,17 +118,29 @@ class ExtensionsTest extends AnyFlatSpec {
   }
 
   it should "is tiling enabled" in {
-    def buildGroupByWithCustomJson(customJson: String = null): GroupBy =
+    def buildGroupByWithServingFlags(flags: Map[String, String] = null): GroupByOps = {
+
+      val execInfo: ExecutionInfo = if (flags != null) {
+        new ExecutionInfo()
+          .setConf(new ConfigProperties().setServing(flags.toJava))
+      } else {
+        null
+      }
+
       Builders.GroupBy(
-        metaData = Builders.MetaData(name = "featureGroupName", customJson = customJson)
+        metaData = Builders.MetaData(name = "featureGroupName", executionInfo = execInfo)
       )
 
+    }
+
     // customJson not set defaults to false
-    assertFalse(buildGroupByWithCustomJson().isTilingEnabled)
-    assertFalse(buildGroupByWithCustomJson("{}").isTilingEnabled)
+    assertFalse(buildGroupByWithServingFlags().tilingFlag)
+    assertFalse(buildGroupByWithServingFlags(Map.empty).tilingFlag)
+
+    val trueGb = buildGroupByWithServingFlags(Map("tiling" -> "true"))
+    assertTrue(trueGb.tilingFlag)
+    assertFalse(buildGroupByWithServingFlags(Map("tiling" -> "false")).tilingFlag)
+    assertFalse(buildGroupByWithServingFlags(Map("tiling" -> "invalid")).tilingFlag)
 
-    assertTrue(buildGroupByWithCustomJson("{\"enable_tiling\": true}").isTilingEnabled)
-    assertFalse(buildGroupByWithCustomJson("{\"enable_tiling\": false}").isTilingEnabled)
-    assertFalse(buildGroupByWithCustomJson("{\"enable_tiling\": \"string instead of bool\"}").isTilingEnabled)
   }
 }
@@ -249,13 +249,19 @@ struct MetaData {
 
     4: optional string outputNamespace
 
-    5: optional map<string, string> tableProperties
+    /**
+    * By default we will just partition the output by the date column - set via "spark.chronon.partition.column"
+    * With this we will partition the output with the specified additional columns
+    **/
+    5: optional list<string> additionalOutputPartitionColumns
+
+    6: optional map<string, string> tableProperties
 
     // tag_key -> tag_value - tags allow for repository wide querying, deprecations etc
     // this is object level tag - applies to all columns produced by the object - GroupBy, Join, Model etc
-    6: optional map<string, string> tags
+    20: optional map<string, string> tags
     // column -> tag_key -> tag_value
-    7: optional map<string, map<string, string>> columnTags
+    21: optional map<string, map<string, string>> columnTags
 
     // marking this as true means that the conf can be served online
     // once marked online, a conf cannot be changed - compiling the conf won't be allowed
@@ -286,8 +292,6 @@ struct MetaData {
     204: optional common.ExecutionInfo executionInfo
 }
 
-
-
 // Equivalent to a FeatureSet in chronon terms
 struct GroupBy {
     1: optional MetaData metaData
 
@@ -25,18 +25,42 @@ struct DateRange {
     2: string endDate
 }
 
+/**
+* env vars for different modes of execution - with "common" applying to all modes
+* the submitter will set these env vars prior to launching the job
+*
+* these env vars are layered in order of priority
+*   1. company file defaults specified in teams.py - in the "common" team
+*   2. team wide defaults that apply to all objects in the team folder
+*   3. object specific defaults - applies to only the object that are declares them
+*
+* All the maps from the above three places are merged to create final env var
+**/
 struct EnvironmentVariables {
     1: optional map<string, string> common
     2: optional map<string, string> backfill
     3: optional map<string, string> upload
     4: optional map<string, string> streaming
+    5: optional map<string, string> serving
 }
 
+/**
+* job config for different modes of execution - with "common" applying to all modes
+* usually these are spark or flink conf params like "spark.executor.memory" etc
+*
+* these confs are layered in order of priority
+*   1. company file defaults specified in teams.py - in the "common" team
+*   2. team wide defaults that apply to all objects in the team folder
+*   3. object specific defaults - applies to only the object that are declares them
+*
+* All the maps from the above three places are merged to create final conf map
+**/
 struct ConfigProperties {
     1: optional map<string, string> common
     2: optional map<string, string> backfill
     3: optional map<string, string> upload
     4: optional map<string, string> streaming
+    5: optional map<string, string> serving
 }
 
 struct TableDependency {
 
@@ -80,6 +80,4 @@ the [Chronon on Flink documentation](setup/Flink.md) for instructions. As part o
 modify your KV store implementation to know how to write and fetch tiles.
 
 Once the Flink app is set up and writing tiles to your datastore, the final step is to enable tiled reads in the
-Fetcher. Just add `enable_tiling=true` to
-the [customJson](https://github.com/airbnb/chronon/blob/48b789dd2c216c62bbf1d74fbf4e779f23db541f/api/py/ai/chronon/group_by.py#L561)
-of any GroupBy definition. 
+Fetcher. Just add `tiling=true` to `metaData.executionInfo.conf.serving` of any GroupBy definition.
@@ -5,7 +5,7 @@ import ai.chronon.api.Constants
 import ai.chronon.api.DataType
 import ai.chronon.api.GroupBy
 import ai.chronon.api.Row
-import ai.chronon.api.ScalaJavaConversions.ListOps
+import ai.chronon.api.ScalaJavaConversions.{IteratorOps, ListOps}
 import ai.chronon.flink.types.TimestampedIR
 import ai.chronon.flink.types.TimestampedTile
 import ai.chronon.online.TileCodec
@@ -42,6 +42,23 @@ class FlinkRowAggregationFunction(
   private val valueColumns: Array[String] = inputSchema.map(_._1).toArray // column order matters
   private val timeColumnAlias: String = Constants.TimeColumn
 
+  private val isMutation: Boolean = {
+    Option(groupBy.getSources).exists(
+      _.iterator().toScala
+        .exists(source => source.isSetEntities && source.getEntities.isSetMutationTopic)
+    )
+  }
+
+  private val reversalIndex = {
+    val result = inputSchema.indexWhere(_._1 == Constants.ReversalColumn)
+
+    if (isMutation)
+      require(result >= 0,
+              s"Please specify source.query.reversal_column for CDC sources, only found, ${inputSchema.map(_._1)}")
+
+    result
+  }
+
   /*
    * Initialize the transient rowAggregator.
    * Running this method is an idempotent operation:
@@ -60,6 +77,7 @@ class FlinkRowAggregationFunction(
       element: Map[String, Any],
       accumulatorIr: TimestampedIR
   ): TimestampedIR = {
+
     // Most times, the time column is a Long, but it could be a Double.
     val tsMills = Try(element(timeColumnAlias).asInstanceOf[Long])
       .getOrElse(element(timeColumnAlias).asInstanceOf[Double].toLong)
@@ -79,7 +97,14 @@ class FlinkRowAggregationFunction(
     )
 
     val partialAggregates = Try {
-      rowAggregator.update(accumulatorIr.ir, row)
+      val isDelete = isMutation && row.getAs[Boolean](reversalIndex)
+
+      if (isDelete) {
+        rowAggregator.delete(accumulatorIr.ir, row)
+      } else {
+        rowAggregator.update(accumulatorIr.ir, row)
+      }
+
     }
 
     partialAggregates match {
 
@@ -89,7 +89,6 @@ class GroupByServingInfoParsed(val groupByServingInfo: GroupByServingInfo, parti
   // Start tiling specific variables
 
   lazy val tiledCodec: TileCodec = new TileCodec(groupBy, valueChrononSchema.fields.map(sf => (sf.name, sf.fieldType)))
-  lazy val isTilingEnabled: Boolean = groupByOps.isTilingEnabled
 
   // End tiling specific variables
 
 
@@ -25,22 +25,12 @@ object MetadataEndPoint {
   val ConfByKeyEndPointName = "CHRONON_METADATA"
   val NameByTeamEndPointName = "CHRONON_ENTITY_BY_TEAM"
 
-  private def getTeamFromMetadata(metaData: MetaData): String = {
-    val team = metaData.team
-    if (metaData.customJson != null && metaData.customJson.nonEmpty) {
-      implicit val formats = DefaultFormats
-      val customJson = parse(metaData.customJson)
-      val teamFromJson: String = (customJson \ "team_override").extractOpt[String].getOrElse("")
-      if (teamFromJson.nonEmpty) teamFromJson else team
-    } else team
-  }
-
   private def parseTeam[Conf <: TBase[_, _]: Manifest: ClassTag](conf: Conf): String = {
     conf match {
-      case join: Join                 => "joins/" + getTeamFromMetadata(join.metaData)
-      case groupBy: GroupBy           => "group_bys/" + getTeamFromMetadata(groupBy.metaData)
-      case stagingQuery: StagingQuery => "staging_queries/" + getTeamFromMetadata(stagingQuery.metaData)
-      case model: Model               => "models/" + getTeamFromMetadata(model.metaData)
+      case join: Join                 => "joins/" + join.metaData.team
+      case groupBy: GroupBy           => "group_bys/" + groupBy.metaData.team
+      case stagingQuery: StagingQuery => "staging_queries/" + stagingQuery.metaData.team
+      case model: Model               => "models/" + model.metaData.team
       case _ =>
         logger.error(s"Failed to parse team from $conf")
         throw new Exception(s"Failed to parse team from $conf")