|
135 | 135 | ZIPLINE_ONLINE_CLASS_DEFAULT = "ai.chronon.integrations.cloud_gcp.GcpApiImpl"
|
136 | 136 | ZIPLINE_FLINK_JAR_DEFAULT = "flink-assembly-0.1.0-SNAPSHOT.jar"
|
137 | 137 | ZIPLINE_DATAPROC_SUBMITTER_JAR = "cloud_gcp_submitter_deploy.jar"
|
| 138 | +ZIPLINE_SERVICE_JAR = "service-0.1.0-SNAPSHOT.jar" |
138 | 139 |
|
139 | 140 | ZIPLINE_DIRECTORY = "/tmp/zipline"
|
140 | 141 |
|
@@ -864,78 +865,27 @@ def generate_dataproc_submitter_args(user_args: str, job_type: DataprocJobType =
|
864 | 865 | raise ValueError(f"Invalid job type: {job_type}")
|
865 | 866 |
|
866 | 867 |
|
867 |
| -def download_dataproc_submitter_jar(destination_dir: str, customer_id: str): |
| 868 | +def download_zipline_jar(destination_dir: str, customer_id: str, jar_name: str): |
868 | 869 | bucket_name = f"zipline-artifacts-{customer_id}"
|
869 | 870 |
|
870 |
| - file_name = ZIPLINE_DATAPROC_SUBMITTER_JAR |
871 |
| - |
872 |
| - source_blob_name = f"jars/{file_name}" |
873 |
| - dataproc_jar_destination_path = f"{destination_dir}/{file_name}" |
874 |
| - |
875 |
| - are_identical = compare_gcs_and_local_file_hashes(bucket_name, source_blob_name, |
876 |
| - dataproc_jar_destination_path) if os.path.exists( |
877 |
| - dataproc_jar_destination_path) else False |
878 |
| - |
879 |
| - if are_identical: |
880 |
| - print( |
881 |
| - f"{dataproc_jar_destination_path} matches GCS {bucket_name}/{source_blob_name}") |
882 |
| - else: |
883 |
| - print( |
884 |
| - f"{dataproc_jar_destination_path} does NOT match GCS {bucket_name}/{source_blob_name}") |
885 |
| - print("Downloading dataproc submitter jar from GCS...") |
886 |
| - download_gcs_blob(bucket_name, source_blob_name, |
887 |
| - dataproc_jar_destination_path) |
888 |
| - |
889 |
| - return dataproc_jar_destination_path |
890 |
| - |
891 |
| - |
892 |
| -def download_chronon_gcp_jar(destination_dir: str, customer_id: str): |
893 |
| - bucket_name = f"zipline-artifacts-{customer_id}" |
894 |
| - |
895 |
| - file_name = ZIPLINE_ONLINE_JAR_DEFAULT |
896 |
| - |
897 |
| - source_blob_name = f"jars/{file_name}" |
898 |
| - chronon_gcp_jar_destination_path = f"{destination_dir}/{file_name}" |
899 |
| - |
900 |
| - are_identical = compare_gcs_and_local_file_hashes(bucket_name, source_blob_name, |
901 |
| - chronon_gcp_jar_destination_path) if os.path.exists( |
902 |
| - chronon_gcp_jar_destination_path) else False |
903 |
| - |
904 |
| - if are_identical: |
905 |
| - print( |
906 |
| - f"{chronon_gcp_jar_destination_path} matches GCS {bucket_name}/{source_blob_name}") |
907 |
| - else: |
908 |
| - print( |
909 |
| - f"{chronon_gcp_jar_destination_path} does NOT match GCS {bucket_name}/{source_blob_name}") |
910 |
| - print("Downloading chronon gcp jar from GCS...") |
911 |
| - download_gcs_blob(bucket_name, source_blob_name, |
912 |
| - chronon_gcp_jar_destination_path) |
913 |
| - return chronon_gcp_jar_destination_path |
914 |
| - |
915 |
| - |
916 |
| -def download_service_jar(destination_dir: str, customer_id: str): |
917 |
| - bucket_name = f"zipline-artifacts-{customer_id}" |
918 |
| - |
919 |
| - file_name = "service-0.1.0-SNAPSHOT.jar" |
920 |
| - |
921 |
| - source_blob_name = f"jars/{file_name}" |
922 |
| - service_jar_destination_path = f"{destination_dir}/{file_name}" |
| 871 | + source_blob_name = f"jars/{jar_name}" |
| 872 | + destination_path = f"{destination_dir}/{jar_name}" |
923 | 873 |
|
924 | 874 | are_identical = compare_gcs_and_local_file_hashes(bucket_name, source_blob_name,
|
925 |
| - service_jar_destination_path) if os.path.exists( |
926 |
| - service_jar_destination_path) else False |
| 875 | + destination_path) if os.path.exists( |
| 876 | + destination_path) else False |
927 | 877 |
|
928 | 878 | if are_identical:
|
929 | 879 | print(
|
930 |
| - f"{service_jar_destination_path} matches GCS {bucket_name}/{source_blob_name}") |
| 880 | + f"{destination_path} matches GCS {bucket_name}/{source_blob_name}") |
931 | 881 | else:
|
932 | 882 | print(
|
933 |
| - f"{service_jar_destination_path} does NOT match GCS {bucket_name}/{source_blob_name}") |
934 |
| - print("Downloading service jar from GCS...") |
| 883 | + f"{destination_path} does NOT match GCS {bucket_name}/{source_blob_name}") |
| 884 | + print(f"Downloading {jar_name} from GCS...") |
935 | 885 |
|
936 | 886 | download_gcs_blob(bucket_name, source_blob_name,
|
937 |
| - service_jar_destination_path) |
938 |
| - return service_jar_destination_path |
| 887 | + destination_path) |
| 888 | + return destination_path |
939 | 889 |
|
940 | 890 |
|
941 | 891 | @retry_decorator(retries=2, backoff=5)
|
@@ -1085,12 +1035,12 @@ def main(ctx, conf, env, mode, dataproc, ds, app_name, start_ds, end_ds, paralle
|
1085 | 1035 | os.makedirs(ZIPLINE_DIRECTORY, exist_ok=True)
|
1086 | 1036 |
|
1087 | 1037 | if dataproc:
|
1088 |
| - jar_path = download_dataproc_submitter_jar(ZIPLINE_DIRECTORY, get_customer_id()) |
| 1038 | + jar_path = download_zipline_jar(ZIPLINE_DIRECTORY, get_customer_id(), ZIPLINE_DATAPROC_SUBMITTER_JAR) |
1089 | 1039 | elif chronon_jar:
|
1090 | 1040 | jar_path = chronon_jar
|
1091 | 1041 | else:
|
1092 |
| - service_jar_path = download_service_jar(ZIPLINE_DIRECTORY, get_customer_id()) |
1093 |
| - chronon_gcp_jar_path = download_chronon_gcp_jar(ZIPLINE_DIRECTORY, get_customer_id()) |
| 1042 | + service_jar_path = download_zipline_jar(ZIPLINE_DIRECTORY, get_customer_id(), ZIPLINE_SERVICE_JAR) |
| 1043 | + chronon_gcp_jar_path = download_zipline_jar(ZIPLINE_DIRECTORY, get_customer_id(), ZIPLINE_ONLINE_JAR_DEFAULT) |
1094 | 1044 | jar_path = f"{service_jar_path}:{chronon_gcp_jar_path}"
|
1095 | 1045 |
|
1096 | 1046 | Runner(ctx.params, os.path.expanduser(jar_path)).run()
|
|
0 commit comments