Skip to content

Commit fc3255b

Browse files
authored
Merge branch 'main' into davidhan/add_job_status
2 parents ae9cdcd + 3e74ec6 commit fc3255b

File tree

22 files changed

+303
-401
lines changed

22 files changed

+303
-401
lines changed

.github/workflows/test_scala_2_12_non_spark.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ on:
1212
- 'hub/**'
1313
- 'orchestration/**'
1414
- 'service/**'
15+
- 'service_commons/**'
1516
- 'cloud_aws/**'
1617
- 'cloud_gcp/**'
1718
- '.github/workflows/test_scala_2_12_non_spark.yaml'
@@ -26,6 +27,7 @@ on:
2627
- 'hub/**'
2728
- 'orchestration/**'
2829
- 'service/**'
30+
- 'service_commons/**'
2931
- 'cloud_aws/**'
3032
- 'cloud_gcp/**'
3133
- '.github/workflows/test_scala_2_12_non_spark.yaml'
@@ -96,6 +98,13 @@ jobs:
9698
--google_credentials=bazel-cache-key.json \
9799
//service:tests
98100
101+
- name: Run service_commons tests
102+
run: |
103+
bazel test \
104+
--remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
105+
--google_credentials=bazel-cache-key.json \
106+
//service_commons:tests
107+
99108
- name: Run orchestrator tests
100109
run: |
101110
bazel test \

.github/workflows/test_scala_2_12_spark.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,31 @@ jobs:
4444
--google_credentials=bazel-cache-key.json \
4545
--test_env=JAVA_OPTS="-Xmx8G -Xms2G" \
4646
//spark:tests
47+
batch_tests:
48+
runs-on: ubuntu-8_cores-32_gb
49+
container:
50+
image: ghcr.io/${{ github.repository }}-ci:latest
51+
credentials:
52+
username: ${{ github.actor }}
53+
password: ${{ secrets.GITHUB_TOKEN }}
54+
defaults:
55+
run:
56+
working-directory: ${{ github.workspace }}
57+
58+
steps:
59+
- uses: actions/checkout@v4
60+
61+
- name: Setup Bazel cache credentials
62+
run: |
63+
echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
64+
65+
- name: Run Batch tests
66+
run: |
67+
bazel test \
68+
--remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
69+
--google_credentials=bazel-cache-key.json \
70+
--test_env=JAVA_OPTS="-Xmx8G -Xms2G" \
71+
//spark:batch_test
4772
4873
fetcher_tests:
4974
runs-on: ubuntu-8_cores-32_gb

api/python/ai/chronon/repo/gcp.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,8 @@ def download_zipline_dataproc_jar(
198198
def generate_dataproc_submitter_args(
199199
self,
200200
user_args: str,
201+
version: str,
201202
job_type: JobType = JobType.SPARK,
202-
version: str = "latest",
203203
local_files_to_upload: List[str] = None,
204204
):
205205
customer_warehouse_bucket_name = f"zipline-warehouse-{get_customer_id()}"
@@ -287,8 +287,8 @@ def run_dataproc_flink_streaming(self):
287287

288288
dataproc_args = self.generate_dataproc_submitter_args(
289289
job_type=JobType.FLINK,
290+
version=self._args["version"],
290291
user_args=" ".join([user_args_str, flag_args_str]),
291-
version=self._args.get("version", "latest"),
292292
)
293293
command = f"java -cp {self.jar_path} {DATAPROC_ENTRY} {dataproc_args}"
294294
return command
@@ -326,7 +326,7 @@ def run(self):
326326
dataproc_args = self.generate_dataproc_submitter_args(
327327
# for now, self.conf is the only local file that requires uploading to gcs
328328
user_args=args,
329-
version=self._args.get("version", "latest"),
329+
version=self._args["version"],
330330
)
331331
command = f"java -cp {self.jar_path} {DATAPROC_ENTRY} {dataproc_args}"
332332
command_list.append(command)
@@ -371,7 +371,7 @@ def run(self):
371371
local_files_to_upload=local_files_to_upload_to_gcs,
372372
# for now, self.conf is the only local file that requires uploading to gcs
373373
user_args=user_args,
374-
version=self._args.get("version", "latest"),
374+
version=self._args["version"],
375375
)
376376
command = (
377377
f"java -cp {self.jar_path} {DATAPROC_ENTRY} {dataproc_args}"
@@ -399,7 +399,7 @@ def run(self):
399399
# for now, self.conf is the only local file that requires uploading to gcs
400400
local_files_to_upload=local_files_to_upload_to_gcs,
401401
user_args=user_args,
402-
version=self._args.get("version", "latest"),
402+
version=self._args["version"],
403403
)
404404
command = f"java -cp {self.jar_path} {DATAPROC_ENTRY} {dataproc_args}"
405405
command_list.append(command)

api/python/ai/chronon/repo/run.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
import os
2121
from datetime import datetime
22+
from importlib.metadata import PackageNotFoundError
23+
from importlib.metadata import version as ver
2224

2325
import click
2426

@@ -48,9 +50,10 @@
4850

4951

5052
def set_defaults(ctx):
51-
"""Set default values based on environment"""
53+
"""Set default values based on environment."""
5254
chronon_repo_path = os.environ.get("CHRONON_REPO_PATH", ".")
5355
today = datetime.today().strftime("%Y-%m-%d")
56+
5457
defaults = {
5558
"mode": "backfill",
5659
"dataproc": False,
@@ -80,6 +83,14 @@ def set_defaults(ctx):
8083
ctx.params[key] = value
8184

8285

86+
def _set_package_version():
87+
try:
88+
package_version = ver("zipline-ai")
89+
except PackageNotFoundError:
90+
print("No package found. Continuing with the latest version.")
91+
package_version = "latest"
92+
return package_version
93+
8394
@click.command(
8495
name="run",
8596
context_settings=dict(allow_extra_args=True, ignore_unknown_options=True),
@@ -119,7 +130,7 @@ def set_defaults(ctx):
119130
"--online-class",
120131
help="Class name of Online Impl. Used for streaming and metadata-upload mode.",
121132
)
122-
@click.option("--version", default="latest", help="Chronon version to use.")
133+
@click.option("--version", default=_set_package_version, help="Chronon version to use.")
123134
@click.option(
124135
"--spark-version", default="2.4.0", help="Spark version to use for downloading jar."
125136
)

api/thrift/api.thrift

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,10 +264,6 @@ struct MetaData {
264264
// column -> tag_key -> tag_value
265265
21: optional map<string, map<string, string>> columnTags
266266

267-
// A stage is a "sub-transformation" of a given node. For example a `GroupBy` can consist of selects (with SQL expressions), filters (in the form of where clauses), followed by aggregations defined in the Zipline DSL.
268-
// Each of this is a `stage` with its own column level lineage.
269-
8: optional list<lineage.StageWithLineage> stagesWithLineage
270-
271267
// marking this as true means that the conf can be served online
272268
// once marked online, a conf cannot be changed - compiling the conf won't be allowed
273269
100: optional bool online

api/thrift/lineage.thrift

Lines changed: 0 additions & 52 deletions
This file was deleted.

0 commit comments

Comments
 (0)