zipline-ai
diff --git a/‎.github/workflows/test_scala_2_12_non_spark.yaml
Lines changed: 9 additions & 0 deletions b/‎.github/workflows/test_scala_2_12_non_spark.yaml
Lines changed: 9 additions & 0 deletions
diff --git a/‎.github/workflows/test_scala_2_12_spark.yaml
Lines changed: 25 additions & 0 deletions b/‎.github/workflows/test_scala_2_12_spark.yaml
Lines changed: 25 additions & 0 deletions
diff --git a/‎api/python/ai/chronon/repo/gcp.py
Lines changed: 5 additions & 5 deletions b/‎api/python/ai/chronon/repo/gcp.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎api/python/ai/chronon/repo/run.py
Lines changed: 13 additions & 2 deletions b/‎api/python/ai/chronon/repo/run.py
Lines changed: 13 additions & 2 deletions
diff --git a/‎api/thrift/api.thrift
Lines changed: 0 additions & 4 deletions b/‎api/thrift/api.thrift
Lines changed: 0 additions & 4 deletions
diff --git a/‎api/thrift/lineage.thrift
Lines changed: 0 additions & 52 deletions b/‎api/thrift/lineage.thrift
Lines changed: 0 additions & 52 deletions
@@ -12,6 +12,7 @@ on:
       - 'hub/**'
       - 'orchestration/**'
       - 'service/**'
+      - 'service_commons/**'
       - 'cloud_aws/**'
       - 'cloud_gcp/**'
       - '.github/workflows/test_scala_2_12_non_spark.yaml'
@@ -26,6 +27,7 @@ on:
       - 'hub/**'
       - 'orchestration/**'
       - 'service/**'
+      - 'service_commons/**'
       - 'cloud_aws/**'
       - 'cloud_gcp/**'
       - '.github/workflows/test_scala_2_12_non_spark.yaml'
@@ -96,6 +98,13 @@ jobs:
             --google_credentials=bazel-cache-key.json \
             //service:tests
 
+      - name: Run service_commons tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //service_commons:tests
+
       - name: Run orchestrator tests
         run: |
           bazel test \
 
@@ -44,6 +44,31 @@ jobs:
             --google_credentials=bazel-cache-key.json \
             --test_env=JAVA_OPTS="-Xmx8G -Xms2G" \
             //spark:tests
+  batch_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Batch tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx8G -Xms2G" \
+            //spark:batch_test
 
   fetcher_tests:
     runs-on: ubuntu-8_cores-32_gb
 
@@ -198,8 +198,8 @@ def download_zipline_dataproc_jar(
     def generate_dataproc_submitter_args(
         self,
         user_args: str,
+        version: str,
         job_type: JobType = JobType.SPARK,
-        version: str = "latest",
         local_files_to_upload: List[str] = None,
     ):
         customer_warehouse_bucket_name = f"zipline-warehouse-{get_customer_id()}"
@@ -287,8 +287,8 @@ def run_dataproc_flink_streaming(self):
 
         dataproc_args = self.generate_dataproc_submitter_args(
             job_type=JobType.FLINK,
+            version=self._args["version"],
             user_args=" ".join([user_args_str, flag_args_str]),
-            version=self._args.get("version", "latest"),
         )
         command = f"java -cp {self.jar_path} {DATAPROC_ENTRY} {dataproc_args}"
         return command
@@ -326,7 +326,7 @@ def run(self):
             dataproc_args = self.generate_dataproc_submitter_args(
                 # for now, self.conf is the only local file that requires uploading to gcs
                 user_args=args,
-                version=self._args.get("version", "latest"),
+                version=self._args["version"],
             )
             command = f"java -cp {self.jar_path} {DATAPROC_ENTRY} {dataproc_args}"
             command_list.append(command)
@@ -371,7 +371,7 @@ def run(self):
                         local_files_to_upload=local_files_to_upload_to_gcs,
                         # for now, self.conf is the only local file that requires uploading to gcs
                         user_args=user_args,
-                        version=self._args.get("version", "latest"),
+                        version=self._args["version"],
                     )
                     command = (
                         f"java -cp {self.jar_path} {DATAPROC_ENTRY} {dataproc_args}"
@@ -399,7 +399,7 @@ def run(self):
                     # for now, self.conf is the only local file that requires uploading to gcs
                     local_files_to_upload=local_files_to_upload_to_gcs,
                     user_args=user_args,
-                    version=self._args.get("version", "latest"),
+                    version=self._args["version"],
                 )
                 command = f"java -cp {self.jar_path} {DATAPROC_ENTRY} {dataproc_args}"
                 command_list.append(command)
 
@@ -19,6 +19,8 @@
 
 import os
 from datetime import datetime
+from importlib.metadata import PackageNotFoundError
+from importlib.metadata import version as ver
 
 import click
 
@@ -48,9 +50,10 @@
 
 
 def set_defaults(ctx):
-    """Set default values based on environment"""
+    """Set default values based on environment."""
     chronon_repo_path = os.environ.get("CHRONON_REPO_PATH", ".")
     today = datetime.today().strftime("%Y-%m-%d")
+
     defaults = {
         "mode": "backfill",
         "dataproc": False,
@@ -80,6 +83,14 @@ def set_defaults(ctx):
             ctx.params[key] = value
 
 
+def _set_package_version():
+    try:
+        package_version = ver("zipline-ai")
+    except PackageNotFoundError:
+        print("No package found. Continuing with the latest version.")
+        package_version = "latest"
+    return package_version
+
 @click.command(
     name="run",
     context_settings=dict(allow_extra_args=True, ignore_unknown_options=True),
@@ -119,7 +130,7 @@ def set_defaults(ctx):
     "--online-class",
     help="Class name of Online Impl. Used for streaming and metadata-upload mode.",
 )
-@click.option("--version", default="latest", help="Chronon version to use.")
+@click.option("--version", default=_set_package_version, help="Chronon version to use.")
 @click.option(
     "--spark-version", default="2.4.0", help="Spark version to use for downloading jar."
 )
 
@@ -264,10 +264,6 @@ struct MetaData {
     // column -> tag_key -> tag_value
     21: optional map<string, map<string, string>> columnTags
 
-    // A stage is a "sub-transformation" of a given node. For example a `GroupBy` can consist of selects (with SQL expressions), filters (in the form of where clauses), followed by aggregations defined in the Zipline DSL.
-    // Each of this is a `stage` with its own column level lineage.
-    8: optional list<lineage.StageWithLineage> stagesWithLineage
-
     // marking this as true means that the conf can be served online
     // once marked online, a conf cannot be changed - compiling the conf won't be allowed
     100: optional bool online