diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000000..dd84ea7824
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,38 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Desktop (please complete the following information):**
+ - OS: [e.g. iOS]
+ - Browser [e.g. chrome, safari]
+ - Version [e.g. 22]
+
+**Smartphone (please complete the following information):**
+ - Device: [e.g. iPhone6]
+ - OS: [e.g. iOS8.1]
+ - Browser [e.g. stock browser, safari]
+ - Version [e.g. 22]
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000000..bbcbbe7d61
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/.github/image/Dockerfile b/.github/image/Dockerfile
new file mode 100644
index 0000000000..ddacbcde4c
--- /dev/null
+++ b/.github/image/Dockerfile
@@ -0,0 +1,76 @@
+FROM ubuntu:latest
+
+# build using command: docker build --progress=plain -t chronon-base .
+
+# Install necessary tools and Python
+RUN apt update && apt install -y wget curl bash python3 python3-pip openjdk-17-jdk python3.12-venv
+
+# java
+ENV JAVA_HOME=/usr/lib/jvm/default-jvm
+ENV PATH=$PATH:$JAVA_HOME/bin
+
+# sbt for scala
+RUN curl -L "https://github.com/sbt/sbt/releases/download/v1.8.2/sbt-1.8.2.tgz" | tar -xz -C /usr/local
+ENV PATH="/usr/local/sbt/bin:${PATH}"
+
+# bazel
+RUN curl -fsSL "https://github.com/bazelbuild/bazelisk/releases/download/v1.18.0/bazelisk-linux-amd64" -o /usr/local/bin/bazel
+RUN chmod +x /usr/local/bin/bazel
+ENV PATH="/usr/local/bin:${PATH}"
+
+# thrift
+ARG THRIFT_VERSION=0.21.0
+RUN apt install -y \
+        build-essential \
+        cmake \
+        libboost-dev \
+        libssl-dev \
+        libevent-dev \
+        bison \
+        flex \
+        autoconf \
+        automake \
+        libtool \
+        curl && \
+    curl -LSs https://archive.apache.org/dist/thrift/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}.tar.gz -o thrift-${THRIFT_VERSION}.tar.gz && \
+    tar -xzf thrift-${THRIFT_VERSION}.tar.gz && \
+    cd thrift-${THRIFT_VERSION} && \
+    ./configure --without-python --without-cpp --without-nodejs --without-java && \
+    make && \
+    make install && \
+    cd .. && \
+    rm -rf thrift-${THRIFT_VERSION} thrift-${THRIFT_VERSION}.tar.gz && \
+    apt purge -y \
+        build-essential \
+        cmake \
+        libboost-dev \
+        libssl-dev \
+        libevent-dev \
+        bison \
+        flex \
+        autoconf \
+        automake \
+        libtool \
+        curl && \
+    apt autoremove -y && \
+    rm -rf /var/lib/apt/lists/*
+
+# Upgrade pip and install some common Python packages
+RUN pip3 install --break-system-packages pytest tox flake8 ruff
+
+RUN apt update && apt install -y build-essential git
+RUN mkdir -p /usr/lib/jvm && ln -s /usr/lib/jvm/java-17-openjdk-amd64/ /usr/lib/jvm/default-jvm
+
+# Verify installations
+RUN java -version && \
+    thrift -version && \
+    python3 --version  && \
+    pip3 --version && \
+    bazel --version && \
+    git --version
+
+# Set working directory
+WORKDIR /app
+
+# Cmd to run when starting the container
+CMD ["/bin/bash"]
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000000..ba58c4b648
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,8 @@
+## Summary
+
+## Checklist
+- [ ] Added Unit Tests
+- [ ] Covered by existing CI
+- [ ] Integration tested
+- [ ] Documentation update
+
diff --git a/.github/release.yml b/.github/release.yml
new file mode 100644
index 0000000000..31a30d3b8a
--- /dev/null
+++ b/.github/release.yml
@@ -0,0 +1,17 @@
+changelog:
+  exclude:
+    labels:
+      - ignore-for-release
+  categories:
+    - title: Major features  / breaking changes
+      labels:
+        - Semver-Major
+    - title: Minor features 
+      labels:
+        - Semver-Minor
+    - title: Bug fixes
+      labels:
+        - Semver-Patch
+    - title: Other changes
+      labels:
+        - "*"
\ No newline at end of file
diff --git a/.github/workflows/build_and_push_docker.yaml b/.github/workflows/build_and_push_docker.yaml
new file mode 100644
index 0000000000..d6de80b95c
--- /dev/null
+++ b/.github/workflows/build_and_push_docker.yaml
@@ -0,0 +1,35 @@
+name: Build and Push Docker Image
+
+on:
+  push:
+    paths:
+      - '.github/image/Dockerfile'
+  workflow_dispatch:
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}-ci
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@v1
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v6
+        with:
+          context: .github/image
+          push: true
+          tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
\ No newline at end of file
diff --git a/.github/workflows/require_triggered_status_checks.yaml b/.github/workflows/require_triggered_status_checks.yaml
new file mode 100644
index 0000000000..d9a714ce32
--- /dev/null
+++ b/.github/workflows/require_triggered_status_checks.yaml
@@ -0,0 +1,14 @@
+name: branch_protection
+on:
+  push:
+jobs:
+  enforce_triggered_workflows:
+    runs-on: ubuntu-latest
+    permissions:
+      checks: read
+    steps:
+      - name: GitHub Checks
+        uses: poseidon/wait-for-status-checks@v0.6.0
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          delay: "10s"
diff --git a/.github/workflows/test_bazel_config.yaml b/.github/workflows/test_bazel_config.yaml
new file mode 100644
index 0000000000..2a158b5062
--- /dev/null
+++ b/.github/workflows/test_bazel_config.yaml
@@ -0,0 +1,53 @@
+name: Test Bazel Config
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'tools/**'
+      - '.github/workflows/test_bazel_config.yaml'
+      - '.bazelrc'
+      - '.bazeliskrc'
+      - 'WORKSPACE'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'tools/**'
+      - '.github/workflows/test_bazel_config.yaml'
+      - '.bazelrc'
+      - '.bazeliskrc'
+      - 'WORKSPACE'
+
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  bazel_config_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run bazel sync
+        run: |
+          bazel build \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --nobuild \
+            //...
diff --git a/.github/workflows/test_python.yaml b/.github/workflows/test_python.yaml
new file mode 100644
index 0000000000..6c0467e0f3
--- /dev/null
+++ b/.github/workflows/test_python.yaml
@@ -0,0 +1,71 @@
+name: Test Python
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'api/python/**'
+      - '.github/workflows/test_python.yaml'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'api/python/**'
+      - '.github/workflows/test_python.yaml'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  python_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Configure Git
+        run: |
+          git config --global user.email "github-actions@github.com"
+          git config --global user.name "GitHub Actions"
+
+      - name: Set up Python virtual environment
+        shell: bash
+        run: |
+          python3 -m venv chronon_py_env
+          source chronon_py_env/bin/activate
+
+      - name: Run Chronon Python lint (ruff)
+        shell: bash
+        run: |
+          source chronon_py_env/bin/activate
+          cd api/python
+          pip install ruff
+          ruff check .
+
+      - name: Run Chronon Python tests
+        shell: bash
+        run: |
+          set -euxo pipefail
+          source chronon_py_env/bin/activate
+          for file in api/thrift/*.thrift; do
+            thrift --gen py -out api/python/ "$file"
+          done
+          cd api/python
+          pip3 install -r requirements/dev.txt
+          pip3 install tox
+          tox
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: htmlcov
+          path: api/python/htmlcov
\ No newline at end of file
diff --git a/.github/workflows/test_scala_2_12_non_spark.yaml b/.github/workflows/test_scala_2_12_non_spark.yaml
new file mode 100644
index 0000000000..07ed57bc84
--- /dev/null
+++ b/.github/workflows/test_scala_2_12_non_spark.yaml
@@ -0,0 +1,260 @@
+name: Test non-spark modules on scala 2.12
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'flink/**'
+      - 'aggregator/**'
+      - 'online/**'
+      - 'api/**'
+      - 'service/**'
+      - 'service_commons/**'
+      - 'cloud_aws/**'
+      - 'cloud_gcp/**'
+      - '.github/workflows/test_scala_2_12_non_spark.yaml'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'flink/**'
+      - 'aggregator/**'
+      - 'online/**'
+      - 'api/**'
+      - 'service/**'
+      - 'service_commons/**'
+      - 'cloud_aws/**'
+      - 'cloud_gcp/**'
+      - '.github/workflows/test_scala_2_12_non_spark.yaml'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  flink_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run Flink tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //flink:tests
+
+  aggregator_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run Aggregator tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //aggregator:tests
+
+  online_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run Online tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //online:tests
+
+  api_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run api tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //api:tests
+
+  service_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run service tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //service:tests
+
+  service_commons_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run service_commons tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //service_commons:tests
+
+  cloud_gcp_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run cloud gcp tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //cloud_gcp:tests
+
+  cloud_aws_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run cloud aws tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --java_language_version=17 \
+            --java_runtime_version=17 \
+            //cloud_aws:tests
diff --git a/.github/workflows/test_scala_2_12_spark.yaml b/.github/workflows/test_scala_2_12_spark.yaml
new file mode 100644
index 0000000000..39b200e320
--- /dev/null
+++ b/.github/workflows/test_scala_2_12_spark.yaml
@@ -0,0 +1,201 @@
+name: Test Spark module on scala 2.12
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'spark/**'
+      - '.github/workflows/test_scala_2_12_spark.yaml'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'spark/**'
+      - '.github/workflows/test_scala_2_12_spark.yaml'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  spark_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Spark tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx8G -Xms2G" \
+            //spark:tests
+  batch_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Batch tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx8G -Xms2G" \
+            //spark:batch_test
+
+  fetcher_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Fetcher tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
+            //spark:fetcher_test
+
+  join_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Join tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
+            //spark:join_test
+
+  groupby_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run GroupBy tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
+            //spark:groupby_test
+
+  analyzer_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Analyzer tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
+            //spark:analyzer_test
+
+  streaming_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Streaming tests
+        run: |
+          bazel test \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
+            //spark:streaming_test
\ No newline at end of file
diff --git a/.github/workflows/test_scala_2_13_non_spark.yaml b/.github/workflows/test_scala_2_13_non_spark.yaml
new file mode 100644
index 0000000000..752de0a874
--- /dev/null
+++ b/.github/workflows/test_scala_2_13_non_spark.yaml
@@ -0,0 +1,237 @@
+name: Test non-spark modules on scala 2.13
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'flink/**'
+      - 'aggregator/**'
+      - 'online/**'
+      - 'api/**'
+      - 'service/**'
+      - 'cloud_aws/**'
+      - 'cloud_gcp/**'
+      - '.github/workflows/test_scala_2_13_non_spark.yaml'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'flink/**'
+      - 'aggregator/**'
+      - 'online/**'
+      - 'api/**'
+      - 'service/**'
+      - 'cloud_aws/**'
+      - 'cloud_gcp/**'
+      - '.github/workflows/test_scala_2_13_non_spark.yaml'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  flink_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run Flink tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //flink:tests
+
+  aggregator_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run Aggregator tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //aggregator:tests
+
+  online_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run Online tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //online:tests
+
+  api_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run api tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //api:tests
+
+  service_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run service tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //service:tests
+
+  cloud_gcp_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run cloud gcp tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            //cloud_gcp:tests
+
+  cloud_aws_tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Setup Sim-Links
+        id: sim-links
+        run: ./startup.sh
+
+      - name: Run cloud aws tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --java_language_version=17 \
+            --java_runtime_version=17 \
+            //cloud_aws:tests
diff --git a/.github/workflows/test_scala_2_13_spark.yaml b/.github/workflows/test_scala_2_13_spark.yaml
new file mode 100644
index 0000000000..3090eab231
--- /dev/null
+++ b/.github/workflows/test_scala_2_13_spark.yaml
@@ -0,0 +1,209 @@
+name: Test Spark module on scala 2.13
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'spark/**'
+      - '.github/workflows/test_scala_2_13_spark.yaml'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'spark/**'
+      - '.github/workflows/test_scala_2_13_spark.yaml'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  spark_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Spark tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx8G -Xms2G" \
+            //spark:tests
+
+  batch_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Batch tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx8G -Xms2G" \
+            //spark:batch_test
+
+  fetcher_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Fetcher tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
+            //spark:fetcher_test
+
+  join_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Join tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
+            //spark:join_test
+
+  groupby_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run GroupBy tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
+            //spark:groupby_test
+
+  analyzer_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Analyzer tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
+            //spark:analyzer_test
+
+  streaming_tests:
+    runs-on: ubuntu-8_cores-32_gb
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Run Streaming tests
+        run: |
+          bazel test \
+            --config=scala_2.13 \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            --test_env=JAVA_OPTS="-Xmx16G -Xms8G" \
+            //spark:streaming_test
\ No newline at end of file
diff --git a/.github/workflows/test_scala_fmt.yaml b/.github/workflows/test_scala_fmt.yaml
new file mode 100644
index 0000000000..d6ee84cd23
--- /dev/null
+++ b/.github/workflows/test_scala_fmt.yaml
@@ -0,0 +1,50 @@
+name: Scala Fmt
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '**/*.scala'
+      - '.github/workflows/test_scala_fmt.yaml'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - '**/*.scala'
+      - '.github/workflows/test_scala_fmt.yaml'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  scala_compile_fmt_fix:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/${{ github.repository }}-ci:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up locale
+        run: |
+          export LANG=en_US.UTF-8
+          export LC_ALL=en_US.UTF-8
+
+      - name: Setup Bazel cache credentials
+        run: |
+          echo "${{ secrets.BAZEL_CACHE_CREDENTIALS }}" | base64 -d > bazel-cache-key.json
+
+      - name: Check Scalafmt
+        run: |
+          bazel query 'kind("scala_library.*", //...)' | xargs -I {} bazel run \
+            --remote_cache=https://storage.googleapis.com/zipline-bazel-cache \
+            --google_credentials=bazel-cache-key.json \
+            {}.format-test
\ No newline at end of file
diff --git a/BUILD.bazel b/BUILD.bazel
index a05fee3160..e69de29bb2 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1,36 +0,0 @@
-load("@rules_license//rules:license.bzl", "license")
-load("@rules_license//rules:license_kind.bzl", "license_kind")
-
-license_kind(
-    name = "zipline_ai",
-)
-
-license(
-    name = "license",
-    package_name = "zipline_packages",
-    license_kinds = [":zipline_ai"],
-    license_text = "Refer to Zipline Agreement Document",
-)
-
-package(
-    default_package_metadata = [":license"],
-    default_visibility = ["//visibility:public"],
-)
-
-exports_files(["maven_install.json"])
-
-# Don't try to build this target, instead build individual module uber jars with appropriate main class
-# This is currently only used to generate single SBOM file with all our dependencies across modules
-jvm_binary(
-    name = "zipline_packages",
-    deploy_env = [
-        "//tools/build_rules/cloud_gcp:cloud_gcp",
-        "//tools/build_rules/flink:flink",
-    ],
-    main_class = "None",
-    runtime_deps = [
-        "//cloud_gcp:base_cloud_gcp_lib",
-        "//flink:lib",
-        "//service:lib",
-    ],
-)
diff --git a/api/python/ai/chronon/cli/compile/conf_validator.py b/api/python/ai/chronon/cli/compile/conf_validator.py
index 09e12c5fb4..a3c20f8bf6 100644
--- a/api/python/ai/chronon/cli/compile/conf_validator.py
+++ b/api/python/ai/chronon/cli/compile/conf_validator.py
@@ -20,6 +20,7 @@
 import re
 from collections import defaultdict
 from typing import Dict, List, Set
+import textwrap
 
 import ai.chronon.api.common.ttypes as common
 from ai.chronon.api.ttypes import (
@@ -29,7 +30,7 @@
     ExternalPart,
     GroupBy,
     Join,
-    Source,
+    Source, JoinPart,
 )
 from ai.chronon.group_by import get_output_col_names
 from ai.chronon.logger import get_logger
@@ -377,6 +378,80 @@ def _validate_derivations(
                     derived_columns.add(derivation.name)
         return errors
 
+    def _validate_join_part_keys(self, join_part: JoinPart, left_cols: List[str]) -> BaseException:
+        keys = []
+
+        key_mapping = join_part.keyMapping if join_part.keyMapping else {}
+        for key in join_part.groupBy.keyColumns:
+            keys.append(key_mapping.get(key, key))
+
+        missing = [k for k in keys if k not in left_cols]
+
+        err_string = ""
+        left_cols_as_str = ", ".join(left_cols)
+        if missing:
+            err_string += textwrap.dedent(f"""
+                Join is missing keys {missing} on left side. Required for JoinPart: {join_part.groupBy.metaData.name}. 
+                Existing columns on left side: {left_cols_as_str}
+                All required Keys: {join_part.groupBy.keyColumns}
+                Key Mapping: {key_mapping if key_mapping else 'None'}
+                Consider renaming a column on the left, or including the key_mapping argument to your join_part.""")
+
+        if key_mapping:
+            # Left side of key mapping should include columns on the left
+            key_map_keys_missing_from_left = [k for k in key_mapping.keys() if k not in left_cols]
+            if key_map_keys_missing_from_left:
+                err_string += f"\nThe following keys in your key_mapping: {str(key_map_keys_missing_from_left)} are not included in the left side of the join: {left_cols_as_str}"
+
+            # Right side of key mapping should only include keys in GroupBy
+            keys_missing_from_key_map_values = [v for v in key_mapping.values() if v not in join_part.groupBy.keyColumns]
+            if keys_missing_from_key_map_values:
+                err_string += f"\nThe following values in your key_mapping: {str(keys_missing_from_key_map_values)} do not cover any group by key columns: {join_part.groupBy.keyColumns}"
+
+            if key_map_keys_missing_from_left or keys_missing_from_key_map_values:
+                err_string += f"\nKey Mapping should be formatted as column_from_left -> group_by_key."
+
+        if err_string:
+            return ValueError(err_string)
+
+
+    def _validate_keys(self, join: Join) -> List[BaseException]:
+        left = join.left
+
+        left_selects = None
+        if left.events:
+            left_selects = left.events.query.selects
+        elif left.entities:
+            left_selects = left.entities.query.selects
+        elif left.joinSource:
+            left_selects = left.joinSource.query.selects
+            # TODO -- if selects are not selected here, get output cols from join
+
+        left_cols = []
+
+        if left_selects:
+            left_cols = left_selects.keys()
+
+        errors = []
+
+        if left_cols:
+            join_parts = join.joinParts
+
+            # Add label_parts to join_parts to validate if set
+            label_parts = join.labelParts
+            if label_parts:
+                for label_jp in label_parts.labels:
+                    join_parts.append(label_jp)
+
+            # Validate join_parts
+            for join_part in join_parts:
+                join_part_err = self._validate_join_part_keys(join_part, left_cols)
+                if join_part_err:
+                    errors.append(join_part_err)
+
+        return errors
+
+
     def _validate_join(self, join: Join) -> List[BaseException]:
         """
         Validate join's status with materialized versions of group_bys
@@ -437,6 +512,9 @@ def _validate_join(self, join: Join) -> List[BaseException]:
                 keys = get_pre_derived_source_keys(join.left)
                 columns = features + keys
             errors.extend(self._validate_derivations(columns, join.derivations))
+
+        errors.extend(self._validate_keys(join))
+
         return errors
 
     def _validate_group_by(self, group_by: GroupBy) -> List[BaseException]:
diff --git a/api/thrift/agent.thrift b/api/thrift/agent.thrift
index fb510c7017..da81c72a7a 100644
--- a/api/thrift/agent.thrift
+++ b/api/thrift/agent.thrift
@@ -5,8 +5,8 @@ include "common.thrift"
 struct YarnAutoScalingSpec {
     1: optional i32 minInstances
     2: optional i32 maxInstances
-    3: optional i32 scaleUpFactor // 1.5x, 2x etc
-    4: optional i32 scaleDownFactor
+    3: optional double scaleUpFactor // 1.5x, 2x etc
+    4: optional double scaleDownFactor
     5: optional string cooldownPeriod
 }
 
diff --git a/cloud_aws/src/main/scala/ai/chronon/integrations/aws/EmrSubmitter.scala b/cloud_aws/src/main/scala/ai/chronon/integrations/aws/EmrSubmitter.scala
index 3051a4f7a2..b757ff04a9 100644
--- a/cloud_aws/src/main/scala/ai/chronon/integrations/aws/EmrSubmitter.scala
+++ b/cloud_aws/src/main/scala/ai/chronon/integrations/aws/EmrSubmitter.scala
@@ -56,10 +56,17 @@ class EmrSubmitter(customerId: String, emrClient: EmrClient) extends JobSubmitte
                                           clusterIdleTimeout: Int = DefaultClusterIdleTimeout,
                                           masterInstanceType: String = DefaultClusterInstanceType,
                                           slaveInstanceType: String = DefaultClusterInstanceType,
-                                          instanceCount: Int = DefaultClusterInstanceCount) = {
-    val runJobFlowRequestBuilder = RunJobFlowRequest
-      .builder()
-      .name(s"job-${java.util.UUID.randomUUID.toString}")
+                                          instanceCount: Int = DefaultClusterInstanceCount,
+                                          clusterName: Option[String] = None) = {
+    val runJobFlowRequestBuilder = if (clusterName.isDefined) {
+      RunJobFlowRequest
+        .builder()
+        .name(clusterName.get)
+    } else {
+      RunJobFlowRequest
+        .builder()
+        .name(s"job-${java.util.UUID.randomUUID.toString}")
+    }
 
     // Cluster infra configurations:
     val customerSecurityGroupId = CustomerToSecurityGroupIdMap.getOrElse(
@@ -170,7 +177,9 @@ class EmrSubmitter(customerId: String, emrClient: EmrClient) extends JobSubmitte
           submissionProperties.getOrElse(ClusterIdleTimeout, DefaultClusterIdleTimeout.toString).toInt,
         masterInstanceType = submissionProperties.getOrElse(ClusterInstanceType, DefaultClusterInstanceType),
         slaveInstanceType = submissionProperties.getOrElse(ClusterInstanceType, DefaultClusterInstanceType),
-        instanceCount = submissionProperties.getOrElse(ClusterInstanceCount, DefaultClusterInstanceCount.toString).toInt
+        instanceCount =
+          submissionProperties.getOrElse(ClusterInstanceCount, DefaultClusterInstanceCount.toString).toInt,
+        clusterName = submissionProperties.get(ClusterName)
       )
 
       runJobFlowBuilder.steps(
@@ -200,10 +209,11 @@ class EmrSubmitter(customerId: String, emrClient: EmrClient) extends JobSubmitte
     }
   }
 
-  override def status(jobId: String): Unit = {
+  override def status(jobId: String): String = {
     val describeStepResponse = emrClient.describeStep(DescribeStepRequest.builder().stepId(jobId).build())
     val status = describeStepResponse.step().status()
     println(status)
+    status.toString
   }
 
   override def kill(stepId: String): Unit = {
diff --git a/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/DataprocSubmitter.scala b/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/DataprocSubmitter.scala
index 1d967c6c23..b11f9628a0 100644
--- a/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/DataprocSubmitter.scala
+++ b/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/DataprocSubmitter.scala
@@ -30,17 +30,17 @@ case class GeneralJob(
 
 class DataprocSubmitter(jobControllerClient: JobControllerClient, conf: SubmitterConf) extends JobSubmitter {
 
-  override def status(jobId: String): Unit = {
+  override def status(jobId: String): String = {
     try {
 
       val currentJob: Job = jobControllerClient.getJob(conf.projectId, conf.region, jobId)
-      currentJob.getStatus.getState
+      currentJob.getStatus.getState.toString
 
     } catch {
 
       case e: ApiException =>
         println(s"Error monitoring job: ${e.getMessage}")
-
+        "UNKNOWN" // If there's an error, we return UNKNOWN status
     }
   }
 
diff --git a/docs/source/dev/devnotes.md b/docs/source/dev/devnotes.md
index 7db7318623..d57ed134a7 100644
--- a/docs/source/dev/devnotes.md
+++ b/docs/source/dev/devnotes.md
@@ -147,18 +147,6 @@ bazel build //hub:hub_assembly_deploy.jar
 > transitive dependencies, otherwise `bazel build //{module}:{target}` will only include
 > dependencies specified in the target definition
 
-### Generate SBOM file for security review
-
-We created `zipline_packages` target in root directory BUILD.bazel file including all runtime deps 
-used across our deployed uber jars so we include all the necessary packages
-
-> Note: Do not use `zipline_packages` target for other purposes
-
-```shell
-# This will generate SBOM file at bazel-bin/tools/compliance/zipline_sbom.json
-bazel build //tools/compliance:zipline_sbom
-```
-
 ### All tests for a specific module
 
 Also it's lot easier to just run from IntelliJ
diff --git a/scripts/codemod/BUILD.bazel b/scripts/codemod/BUILD.bazel
new file mode 100644
index 0000000000..0f607ce2ee
--- /dev/null
+++ b/scripts/codemod/BUILD.bazel
@@ -0,0 +1,6 @@
+py_binary(
+    name = "thrift_package_replace",
+    srcs = ["thrift_package_replace.py"],
+    main = "thrift_package_replace.py",
+    visibility = ["//visibility:public"],
+)
\ No newline at end of file
diff --git a/scripts/codemod/test_replace.py b/scripts/codemod/test_replace.py
new file mode 100644
index 0000000000..0a1a17eda2
--- /dev/null
+++ b/scripts/codemod/test_replace.py
@@ -0,0 +1,275 @@
+#!/usr/bin/env python3
+
+
+import glob
+
+"""
+we have tests written in multiple flavors
+
+- extending junit TestCase class
+- using @test annotation
+- using AnyFunSuite
+- using AnyFlatSpec
+- using vertx junit runner
+
+bazel silently fails to run the tests when they are not uniform!
+
+This script translates almost all of the tests to AnyFlatSpec except for vertx tests.
+
+NOTE: CWD needs to be the root of the repo.
+
+USAGE: python3 scripts/codemod/test_replace.py
+"""
+
+
+def get_test_class_name(path):
+    # Get the file name from the path
+    filename = path.split("/")[-1]
+    # Remove 'Test.scala' and return
+    return filename.replace("Test.scala", "")
+
+
+def convert_fun_suite_to_flatspec(lines, test_name):
+    modified_lines = []
+
+    for line in lines:
+        # Replace import statement
+        if "import org.scalatest.funsuite.AnyFunSuite" in line:
+            line = line.replace("funsuite.AnyFunSuite", "flatspec.AnyFlatSpec")
+            modified_lines.append(line)
+            continue
+
+        # Replace AnyFunSuite with AnyFlatSpec
+        if "extends AnyFunSuite" in line:
+            line = line.replace("AnyFunSuite", "AnyFlatSpec")
+            modified_lines.append(line)
+            continue
+
+        # Handle ignore tests and regular tests
+        if ("ignore(" in line or "test(" in line) and "{" in line:
+            start = line.find('"')
+            end = line.find('"', start + 1)
+            if start != -1 and end != -1:
+                test_desc = line[start + 1 : end]  # Get description without quotes
+                words = test_desc.split()
+
+                # Check if second word is "should"
+                if len(words) > 1 and words[1].lower() == "should":
+                    subject = words[0]  # Use first word as subject
+                    remaining_desc = " ".join(
+                        words[2:]
+                    )  # Rest of description including "should"
+                    new_desc = f'"{subject}" should "{remaining_desc}"'
+                else:
+                    new_desc = f'  it should "{test_desc}"'
+
+                # Add appropriate suffix based on whether it's ignore or test
+                if "ignore(" in line:
+                    new_line = f"{new_desc} ignore {{"
+                else:
+                    new_line = f"{new_desc} in {{"
+
+                modified_lines.append(new_line + "\n")
+                continue
+
+        # Keep other lines unchanged
+        modified_lines.append(line)
+
+    return "".join(modified_lines)
+
+
+def split_camel_case(word):
+    if not word:
+        return []
+
+    result = []
+    current_word = word[0].lower()
+
+    for i in range(1, len(word)):
+        current_char = word[i]
+        prev_char = word[i - 1]
+
+        # Split on transition from lowercase to uppercase
+        if current_char.isupper() and prev_char.islower():
+            result.append(current_word)
+            current_word = current_char.lower()
+        # Split on transition from uppercase to lowercase, but only if it's not
+        # part of an acronym (i.e., if the previous char was also uppercase and
+        # not at the start of a word)
+        elif (
+            current_char.islower()
+            and prev_char.isupper()
+            and i > 1
+            and word[i - 2].isupper()
+        ):
+            result.append(current_word[:-1])
+            current_word = prev_char.lower() + current_char
+        else:
+            current_word += current_char.lower()
+
+    result.append(current_word)
+    return [token for token in result if token != "test"]
+
+
+def convert_junit_to_flatspec(lines, test_name):
+    modified_lines = []
+    is_test_method = False
+    class_modified = False
+
+    for line in lines:
+        # Replace JUnit import with FlatSpec import
+        if "import org.junit.Test" in line:
+            modified_lines.append("import org.scalatest.flatspec.AnyFlatSpec\n")
+            continue
+
+        # Handle class definition
+        if "class" in line and "Test" in line and (not class_modified):
+            class_modified = True
+            class_name = line.split("class")[1].split("{")[0].strip()
+            modified_lines.append(f"class {class_name} extends AnyFlatSpec {{\n")
+            continue
+
+        # Mark start of a test method
+        if "@Test" in line:
+            is_test_method = True
+            continue
+
+        # Convert only test methods marked with @Test and not private
+        if (
+            is_test_method
+            and "def " in line
+            and "private" not in line
+            and (("(): Unit" in line) or ("): Unit" not in line))
+        ):
+            is_test_method = False
+
+            method_name = line.split("def ")[1].split("(")[0]
+
+            test_description = " ".join(split_camel_case(method_name))
+
+            modified_lines.append(f'  it should "{test_description}" in {{\n')
+            continue
+
+        is_test_method = False
+        modified_lines.append(line)
+
+    return "".join(modified_lines)
+
+
+def convert_testcase_to_flatspec(lines, test_name):
+    modified_lines = []
+
+    for line in lines:
+        # Replace TestCase import with FlatSpec import
+        if "junit.framework.TestCase" in line:
+            modified_lines.append("import org.scalatest.flatspec.AnyFlatSpec\n")
+            continue
+
+        # Handle imports that we want to keep
+        if line.startswith("import") and "TestCase" not in line:
+            modified_lines.append(line)
+            continue
+
+        # Handle class definition
+        if "class" in line and "extends TestCase" in line:
+            class_name = line.split("class")[1].split("extends")[0].strip()
+            modified_lines.append(f"class {class_name} extends AnyFlatSpec {{\n")
+            continue
+
+        # Convert test methods (they start with "def test")
+        if (
+            "def test" in line
+            and "private" not in line
+            and ("(): Unit" in line or "): Unit" not in line)
+        ):
+            method_name = line.split("def test")[1].split("(")[0].strip()
+            # If there are parameters, capture them
+
+            test_description = " ".join(split_camel_case(method_name))
+
+            modified_lines.append(f'  it should "{test_description}" in {{\n')
+            continue
+
+        modified_lines.append(line)
+
+    return "".join(modified_lines)
+
+
+def convert(handler, file_path):
+    test_name = get_test_class_name(file_path)
+    with open(file_path, "r") as file:
+        lines = file.readlines()
+        converted = handler(lines, test_name)
+
+    with open(file_path, "w") as file:
+        file.write(converted)
+
+    print(f"Converted {file_path}")
+
+
+# Few challenging test cases below
+
+# convert(
+#     convert_junit_to_flatspec,
+#     "spark/src/test/scala/ai/chronon/spark/test/JoinUtilsTest.scala",
+# )
+
+# convert(
+#     convert_junit_to_flatspec,
+#     "spark/src/test/scala/ai/chronon/spark/test/LocalExportTableAbilityTest.scala",
+# )
+
+# convert(
+#     convert_testcase_to_flatspec,
+#     "aggregator/src/test/scala/ai/chronon/aggregator/test/FrequentItemsTest.scala",
+# )
+
+# convert(
+#     convert_fun_suite_to_flatspec,
+#     "spark/src/test/scala/ai/chronon/spark/test/FetcherTest.scala",
+# )
+
+
+if __name__ == "__main__":
+    test_files = glob.glob("**/*Test.scala", recursive=True)
+
+    fun_suite_files = []
+    junit_files = []
+    others = []
+    junit_test_case_files = []
+    flat_spec_files = []
+
+    for file_path in test_files:
+        try:
+            with open(file_path, "r") as file:
+                content = file.read()
+                if "AnyFunSuite" in content:
+                    fun_suite_files.append(file_path)
+                elif "import org.junit.Test" in content:
+                    junit_files.append(file_path)
+                elif "extends TestCase" in content:
+                    junit_test_case_files.append(file_path)
+                elif "extends AnyFlatSpec" in content:
+                    flat_spec_files.append(file_path)
+                else:
+                    others.append(file_path)
+        except Exception as e:
+            print(f"Error reading {file_path}: {e}")
+
+    print(f"funsuite files:\n   {"\n   ".join(fun_suite_files)}")
+
+    for file in fun_suite_files:
+        convert(convert_fun_suite_to_flatspec, file)
+
+    print(f"junit files:\n   {"\n   ".join(junit_files)}")
+
+    for file in junit_files:
+        convert(convert_junit_to_flatspec, file)
+
+    print(f"test case files:\n   {"\n   ".join(junit_test_case_files)}")
+
+    for file in junit_test_case_files:
+        convert(convert_testcase_to_flatspec, file)
+
+    print(f"flat spec files:\n   {"\n   ".join(flat_spec_files)}")
+    print(f"Other files:\n   {"\n   ".join(others)}")
diff --git a/scripts/codemod/thrift_package_replace.py b/scripts/codemod/thrift_package_replace.py
new file mode 100644
index 0000000000..b080831bfe
--- /dev/null
+++ b/scripts/codemod/thrift_package_replace.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+
+def replace_in_file(input_file: Path, output_file: Path) -> None:
+    """Replace package names in a single file."""
+    os.makedirs(output_file.parent, exist_ok=True)
+
+    with open(input_file, 'r') as f:
+        content = f.read()
+
+    modified_content = content.replace('org.apache.thrift', 'ai.chronon.api.thrift')
+
+    with open(output_file, 'w') as f:
+        f.write(modified_content)
+
+
+def process_directory(input_dir: Path, output_dir: Path, verbose: bool = False) -> None:
+    """Process all Java files in the input directory and its subdirectories."""
+    if verbose:
+        print(f"Scanning directory: {input_dir}")
+        print(f"Output directory: {output_dir}")
+
+    # Create output directory if it doesn't exist
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Find all Java files
+    for java_file in input_dir.rglob('*.java'):
+        if verbose:
+            print(f"Processing file: {java_file}")
+
+        # Calculate relative path to maintain directory structure
+        rel_path = java_file.relative_to(input_dir)
+        output_file = output_dir / rel_path
+
+        if verbose:
+            print(f"Writing to: {output_file}")
+
+        replace_in_file(java_file, output_file)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Replace package names in Java files')
+    parser.add_argument('input_dir', type=str, help='Input directory containing Java files')
+    parser.add_argument('output_dir', type=str, help='Output directory for modified files')
+    parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output')
+
+    args = parser.parse_args()
+
+    input_path = Path(args.input_dir)
+    output_path = Path(args.output_dir)
+
+    if not input_path.exists():
+        print(f"Error: Input directory '{input_path}' does not exist", file=sys.stderr)
+        sys.exit(1)
+
+    if not input_path.is_dir():
+        print(f"Error: '{input_path}' is not a directory", file=sys.stderr)
+        sys.exit(1)
+
+    try:
+        process_directory(input_path, output_path, args.verbose)
+        if args.verbose:
+            print("Replacement complete!")
+    except Exception as e:
+        print(f"Error: {str(e)}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/spark/src/main/scala/ai/chronon/spark/submission/JobSubmitter.scala b/spark/src/main/scala/ai/chronon/spark/submission/JobSubmitter.scala
index de8e6927a2..48953e5be8 100644
--- a/spark/src/main/scala/ai/chronon/spark/submission/JobSubmitter.scala
+++ b/spark/src/main/scala/ai/chronon/spark/submission/JobSubmitter.scala
@@ -21,7 +21,7 @@ trait JobSubmitter {
              files: List[String],
              args: String*): String
 
-  def status(jobId: String): Unit
+  def status(jobId: String): String
 
   def kill(jobId: String): Unit
 }
@@ -109,6 +109,7 @@ object JobSubmitterConstants {
   val EmrReleaseLabel = "emrReleaseLabel"
   val ShouldCreateCluster = "shouldCreateCluster"
   val ClusterId = "jobFlowId"
+  val ClusterName = "clusterName"
 
   val JarUriArgKeyword = "--jar-uri"
   val JobTypeArgKeyword = "--job-type"
diff --git a/tools/compliance/BUILD b/tools/compliance/BUILD
deleted file mode 100644
index ce4b287bc9..0000000000
--- a/tools/compliance/BUILD
+++ /dev/null
@@ -1,21 +0,0 @@
-load(":gather_packages.bzl", "packages_used")
-load(":sbom.bzl", "sbom")
-
-packages_used(
-    name = "zipline_packages",
-    out = "zipline_packages.json",
-    target = "//:zipline_packages_deploy.jar",
-)
-
-sbom(
-    name = "zipline_sbom",
-    out = "zipline_sbom.json",
-    target = "//:zipline_packages_deploy.jar",
-)
-
-# Add the write_sbom_private target similar to the reference
-py_binary(
-    name = "write_sbom_private",
-    srcs = ["write_sbom.py"],
-    main = "write_sbom.py",
-)
diff --git a/tools/compliance/gather_packages.bzl b/tools/compliance/gather_packages.bzl
deleted file mode 100644
index ff26347224..0000000000
--- a/tools/compliance/gather_packages.bzl
+++ /dev/null
@@ -1,258 +0,0 @@
-# This file is copied from the bazel github repository https://github.com/bazelbuild/bazel/blob/master/tools/compliance
-# as it was recently added in latest bazel version but we are currently using old 5.4.0 because of pending
-# bazelmod migration for rules_scala package
-# *** DO NOT MODIFY THIS FILE UNLESS WE REALLY NEED TO ***
-
-"""Rules and macros for collecting LicenseInfo providers."""
-
-load("@rules_license//rules:filtered_rule_kinds.bzl", "aspect_filters")
-load(
-    "@rules_license//rules:providers.bzl",
-    "LicenseInfo",
-    "PackageInfo",
-)
-load("@rules_license//rules_gathering:trace.bzl", "TraceInfo")
-load(":to_json.bzl", "labels_to_json", "licenses_to_json", "package_infos_to_json")
-load(":user_filtered_rule_kinds.bzl", "user_aspect_filters")
-
-TransitivePackageInfo = provider(
-    """Transitive list of all SBOM relevant dependencies.""",
-    fields = {
-        "top_level_target": "Label: The top level target label we are examining.",
-        "license_info": "depset(LicenseInfo)",
-        "package_info": "depset(PackageInfo)",
-        "packages": "depset(label)",
-        "target_under_license": "Label: A target which will be associated with some licenses.",
-        "traces": "list(string) - diagnostic for tracing a dependency relationship to a target.",
-    },
-)
-
-# Singleton instance of nothing present. Each level of the aspect must return something,
-# so we use one to save space instead of making a lot of empty ones.
-NULL_INFO = TransitivePackageInfo(license_info = depset(), package_info = depset(), packages = depset())
-
-def should_traverse(ctx, attr):
-    """Checks if the dependent attribute should be traversed.
-
-    Args:
-      ctx: The aspect evaluation context.
-      attr: The name of the attribute to be checked.
-
-    Returns:
-      True iff the attribute should be traversed.
-    """
-    k = ctx.rule.kind
-    for filters in [aspect_filters, user_aspect_filters]:
-        always_ignored = filters.get("*", [])
-        if k in filters:
-            attr_matches = filters[k]
-            if (attr in attr_matches or
-                "*" in attr_matches or
-                ("_*" in attr_matches and attr.startswith("_")) or
-                attr in always_ignored):
-                return False
-
-            for m in attr_matches:
-                if attr == m:
-                    return False
-    return True
-
-def _get_transitive_metadata(ctx, trans_license_info, trans_package_info, trans_packages, traces, provider, filter_func):
-    """Pulls the transitive data up from attributes we care about.
-
-    Collapses all the TransitivePackageInfo providers from my deps into lists
-    that we can then turn into a single TPI.
-    """
-    attrs = [a for a in dir(ctx.rule.attr)]
-    for name in attrs:
-        if not filter_func(ctx, name):
-            continue
-        a = getattr(ctx.rule.attr, name)
-
-        # Make anything singleton into a list for convenience.
-        if type(a) != type([]):
-            a = [a]
-        for dep in a:
-            # Ignore anything that isn't a target
-            if type(dep) != "Target":
-                continue
-
-            # Targets can also include things like input files that won't have the
-            # aspect, so we additionally check for the aspect rather than assume
-            # it's on all targets.  Even some regular targets may be synthetic and
-            # not have the aspect. This provides protection against those outlier
-            # cases.
-            if provider in dep:
-                info = dep[provider]
-                if info.license_info:
-                    trans_license_info.append(info.license_info)
-                if info.package_info:
-                    trans_package_info.append(info.package_info)
-                if info.packages:
-                    trans_packages.append(info.packages)
-
-                if hasattr(info, "traces"):
-                    if info.traces:
-                        for trace in info.traces:
-                            traces.append("(" + ", ".join([str(ctx.label), ctx.rule.kind, name]) + ") -> " + trace)
-
-def gather_package_common(target, ctx, provider_factory, metadata_providers, filter_func):
-    """Collect license and other metadata info from myself and my deps.
-
-    Any single target might directly depend on a license, or depend on
-    something that transitively depends on a license, or neither.
-    This aspect bundles all those into a single provider. At each level, we add
-    in new direct license deps found and forward up the transitive information
-    collected so far.
-
-    This is a common abstraction for crawling the dependency graph. It is
-    parameterized to allow specifying the provider that is populated with
-    results. It is configurable to select only a subset of providers. It
-    is also configurable to specify which dependency edges should not
-    be traced for the purpose of tracing the graph.
-
-    Args:
-      target: The target of the aspect.
-      ctx: The aspect evaluation context.
-      provider_factory: abstracts the provider returned by this aspect
-      metadata_providers: a list of other providers of interest
-      filter_func: a function that returns true iff the dep edge should be ignored
-
-    Returns:
-      provider of parameterized type
-    """
-
-    # A hack until https://github.com/bazelbuild/rules_license/issues/89 is
-    # fully resolved. If exec is in the bin_dir path, then the current
-    # configuration is probably cfg = exec.
-    if "-exec" in ctx.bin_dir.path:
-        return [NULL_INFO]
-
-    # First we gather my direct license attachments
-    licenses = []
-    package_info = []
-    if ctx.rule.kind == "_license":
-        # Don't try to gather licenses from the license rule itself. We'll just
-        # blunder into the text file of the license and pick up the default
-        # attribute of the package, which we don't want.
-        pass
-    elif hasattr(ctx.rule.attr, "applicable_licenses"):
-        for dep in ctx.rule.attr.applicable_licenses:
-            if LicenseInfo in dep:
-                licenses.append(dep[LicenseInfo])
-            if PackageInfo in dep:
-                package_info.depend(dep[LicenseInfo])
-    elif hasattr(ctx.rule.attr, "package_metadata"):
-        for dep in ctx.rule.attr.package_metadata:
-            if LicenseInfo in dep:
-                licenses.append(dep[LicenseInfo])
-            if PackageInfo in dep:
-                package_info.depend(dep[LicenseInfo])
-
-    # Record all the external repos anyway.
-    target_name = str(target.label)
-    packages = []
-    if target_name.startswith("@") and target_name[1] != "/":
-        packages.append(target.label)
-        # DBG print(str(target.label))
-
-    elif hasattr(ctx.rule.attr, "tags"):
-        for tag in ctx.rule.attr.tags:
-            if tag.startswith("maven_coordinates="):
-                packages.append(target.label)
-
-    # Now gather transitive collection of providers from the targets
-    # this target depends upon.
-    trans_license_info = []
-    trans_package_info = []
-    trans_packages = []
-    traces = []
-    _get_transitive_metadata(ctx, trans_license_info, trans_package_info, trans_packages, traces, provider_factory, filter_func)
-
-    if (not licenses and
-        not package_info and
-        not packages and
-        not trans_license_info and
-        not trans_package_info and
-        not trans_packages):
-        return [NULL_INFO]
-
-    # If this is the target, start the sequence of traces.
-    if ctx.attr._trace[TraceInfo].trace and ctx.attr._trace[TraceInfo].trace in str(ctx.label):
-        traces = [ctx.attr._trace[TraceInfo].trace]
-
-    # Trim the number of traces accumulated since the output can be quite large.
-    # A few representative traces are generally sufficient to identify why a dependency
-    # is incorrectly incorporated.
-    if len(traces) > 10:
-        traces = traces[0:10]
-
-    return [provider_factory(
-        target_under_license = target.label,
-        license_info = depset(direct = licenses, transitive = trans_license_info),
-        package_info = depset(direct = package_info, transitive = trans_package_info),
-        packages = depset(direct = packages, transitive = trans_packages),
-        traces = traces,
-    )]
-
-def _gather_package_impl(target, ctx):
-    ret = gather_package_common(
-        target,
-        ctx,
-        TransitivePackageInfo,
-        # [ExperimentalMetadataInfo, PackageInfo],
-        [PackageInfo],
-        should_traverse,
-    )
-
-    # print(ret)
-    return ret
-
-gather_package_info = aspect(
-    doc = """Collects License and Package providers into a single TransitivePackageInfo provider.""",
-    implementation = _gather_package_impl,
-    attr_aspects = ["*"],
-    attrs = {
-        "_trace": attr.label(default = "@rules_license//rules:trace_target"),
-    },
-    provides = [TransitivePackageInfo],
-    apply_to_generating_rules = True,
-)
-
-def _packages_used_impl(ctx):
-    """Write the TransitivePackageInfo as JSON."""
-    tpi = ctx.attr.target[TransitivePackageInfo]
-    licenses_json = licenses_to_json(tpi.license_info)
-    package_info_json = package_infos_to_json(tpi.package_info)
-    packages = labels_to_json(tpi.packages.to_list())
-
-    # Create a single dict of all the info.
-    main_template = """{{
-    "top_level_target": "{top_level_target}",
-    "licenses": {licenses},
-    "package_info": {package_info},
-    "packages": {packages}
-    \n}}"""
-
-    content = main_template.format(
-        top_level_target = ctx.attr.target.label,
-        licenses = licenses_json,
-        package_info = package_info_json,
-        packages = packages,
-    )
-    ctx.actions.write(
-        output = ctx.outputs.out,
-        content = content,
-    )
-
-packages_used = rule(
-    doc = """Gather transitive package information for a target and write as JSON.""",
-    implementation = _packages_used_impl,
-    attrs = {
-        "target": attr.label(
-            aspects = [gather_package_info],
-            allow_files = True,
-        ),
-        "out": attr.output(mandatory = True),
-    },
-)
diff --git a/tools/compliance/packages_used_test.py b/tools/compliance/packages_used_test.py
deleted file mode 100644
index bafb855004..0000000000
--- a/tools/compliance/packages_used_test.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# This file is copied from the bazel github repository https://github.com/bazelbuild/bazel/blob/master/tools/compliance
-# as it was recently added in latest bazel version but we are currently using old 5.4.0 because of pending
-# bazelmod migration for rules_scala package
-# *** DO NOT MODIFY THIS FILE UNLESS WE REALLY NEED TO ***
-
-"""Smoke test for packages_used."""
-
-import json
-import os
-import unittest
-
-
-def read_data_file(basename):
-    path = os.path.join(
-        os.getenv("TEST_SRCDIR"),
-        os.getenv("TEST_WORKSPACE"),
-        "tools/compliance",
-        basename
-    )
-    with open(path, "rt", encoding="utf-8") as f:
-        return f.read()
-
-
-class PackagesUsedTest(unittest.TestCase):
-
-    def test_found_key_licenses(self):
-        raw_json = read_data_file("bazel_packages.json")
-        content = json.loads(raw_json)
-        found_top_level_license = False
-        found_zlib = False
-        for l in content["licenses"]:
-            print(l["label"])  # for debugging the test
-            if l["label"] == "//:license":
-                found_top_level_license = True
-            if l["label"] == "//third_party/bazel:license":
-                found_top_level_license = True
-            if l["label"] == "//third_party/zlib:license":
-                found_zlib = True
-        self.assertTrue(found_top_level_license)
-        self.assertTrue(found_zlib)
-
-    def test_found_remote_packages(self):
-        if os.getenv("TEST_WORKSPACE") != "bazel":
-            return
-        raw_json = read_data_file("bazel_packages.json")
-        content = json.loads(raw_json)
-        self.assertIn(
-            "@@remoteapis+//:build_bazel_remote_execution_v2_remote_execution_proto",
-            content["packages"],
-        )
-
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
diff --git a/tools/compliance/sbom.bzl b/tools/compliance/sbom.bzl
deleted file mode 100644
index 128e927b1a..0000000000
--- a/tools/compliance/sbom.bzl
+++ /dev/null
@@ -1,79 +0,0 @@
-# This file is copied from the bazel github repository https://github.com/bazelbuild/bazel/blob/master/tools/compliance
-# as it was recently added in latest bazel version but we are currently using old 5.4.0 because of pending
-# bazelmod migration for rules_scala package
-# *** DO NOT MODIFY THIS FILE UNLESS WE REALLY NEED TO ***
-
-"""Generate an SBOM for a target."""
-
-load(":gather_packages.bzl", "packages_used")
-
-def _sbom_impl(ctx):
-    # Gather all licenses and write information to one place
-
-    # Now turn the big blob of data into something consumable.
-    outputs = [ctx.outputs.out]
-    args = ctx.actions.args()
-    inputs = [ctx.file.packages_used]
-    args.add("--packages_used", ctx.file.packages_used.path)
-    args.add("--out", ctx.outputs.out.path)
-    if ctx.attr.maven_install:
-        args.add("--maven_install", ctx.file.maven_install.path)
-        inputs.append(ctx.file.maven_install)
-    ctx.actions.run(
-        mnemonic = "CreateSBOM",
-        progress_message = "Creating SBOM for %s" % ctx.label,
-        inputs = inputs,
-        outputs = outputs,
-        executable = ctx.executable._sbom_generator,
-        arguments = [args],
-    )
-    return [DefaultInfo(files = depset(outputs))]
-
-_sbom = rule(
-    implementation = _sbom_impl,
-    attrs = {
-        "packages_used": attr.label(
-            allow_single_file = True,
-            mandatory = True,
-        ),
-        "out": attr.output(mandatory = True),
-        "_sbom_generator": attr.label(
-            default = Label("//tools/compliance:write_sbom_private"),
-            executable = True,
-            allow_files = True,
-            cfg = "exec",
-        ),
-        "maven_install": attr.label(
-            mandatory = False,
-            allow_single_file = True,
-        ),
-    },
-)
-
-def sbom(
-        name,
-        target,
-        out = None,
-        maven_install = "//:maven_install.json"):
-    """Wrapper for sbom rule.
-
-    Args:
-        name: name
-        target: Target to create sbom for
-        out: output file name
-        maven_install: maven lock file
-    """
-    packages = "_packages_" + name
-    packages_used(
-        name = packages,
-        target = target,
-        out = packages + ".json",
-    )
-    if not out:
-        out = name + "_sbom.json"
-    _sbom(
-        name = name,
-        out = out,
-        packages_used = ":" + packages + ".json",
-        maven_install = maven_install,
-    )
diff --git a/tools/compliance/to_json.bzl b/tools/compliance/to_json.bzl
deleted file mode 100644
index 2b69a73d1f..0000000000
--- a/tools/compliance/to_json.bzl
+++ /dev/null
@@ -1,136 +0,0 @@
-# This file is copied from the bazel github repository https://github.com/bazelbuild/bazel/blob/master/tools/compliance
-# as it was recently added in latest bazel version but we are currently using old 5.4.0 because of pending
-# bazelmod migration for rules_scala package
-# *** DO NOT MODIFY THIS FILE UNLESS WE REALLY NEED TO ***
-
-"""Utility methods for turning package metadata to JSON.
-
-These should eventually be part of rules_license.
-"""
-
-def _strip_null_repo(label):
-    """Removes the null repo name (e.g. @//) from a string.
-
-    The is to make str(label) compatible between bazel 5.x and 6.x
-    """
-    s = str(label)
-    if s.startswith("@//"):
-        return s[1:]
-    elif s.startswith("@@//"):
-        return s[2:]
-    return s
-
-def _bazel_package(label):
-    """Returns the package containing a label."""
-    clean_label = _strip_null_repo(label)
-    return clean_label[0:-(len(label.name) + 1)]
-
-_license_template = """{{
-  "label": "{label}",
-  "bazel_package": "{bazel_package}",
-  "license_kinds": [{kinds}],
-  "copyright_notice": "{copyright_notice}",
-  "package_name": "{package_name}",
-  "package_url": "{package_url}",
-  "package_version": "{package_version}",
-  "license_text": "{license_text}"
-}}"""
-
-_kind_template = """{{
-  "target": "{kind_path}",
-  "name": "{kind_name}",
-  "conditions": {kind_conditions}
-}}"""
-
-def license_info_to_json(license):
-    """Converts a LicenseInfo to JSON.
-
-    Args:
-        license: a LicenseInfo
-    Returns:
-        JSON representation of license.
-    """
-    kinds = []
-    for kind in sorted(license.license_kinds, key = lambda x: x.name):
-        kinds.append(_kind_template.format(
-            kind_name = kind.name,
-            kind_path = kind.label,
-            kind_conditions = kind.conditions,
-        ))
-
-    return _license_template.format(
-        copyright_notice = license.copyright_notice,
-        kinds = ",".join(kinds),
-        license_text = license.license_text.path,
-        package_name = license.package_name,
-        package_url = license.package_url,
-        package_version = license.package_version,
-        label = _strip_null_repo(license.label),
-        bazel_package = _bazel_package(license.label),
-    )
-
-def licenses_to_json(licenses):
-    """Converts a list of LicenseInfo to JSON.
-
-    This list is sorted by label for stability.
-
-    Args:
-        licenses: list(LicenseInfo)
-    Returns:
-        JSON representation of licenses
-    """
-    all_licenses = []
-    for license in sorted(licenses.to_list(), key = lambda x: x.label):
-        all_licenses.append(license_info_to_json(license))
-    return "[" + ",".join(all_licenses) + "]"
-
-_package_info_template = """{{
-  "target": "{label}",
-  "bazel_package": "{bazel_package}",
-  "package_name": "{package_name}",
-  "package_url": "{package_url}",
-  "package_version": "{package_version}"
-}}"""
-
-def package_info_to_json(package_info):
-    """Converts a PackageInfo to json.
-
-    Args:
-        package_info: a PackageInfo
-    Returns:
-        JSON representation of package_info.
-    """
-    return _package_info_template.format(
-        label = _strip_null_repo(package_info.label),
-        bazel_package = _bazel_package(package_info.label),
-        package_name = package_info.package_name,
-        package_url = package_info.package_url,
-        package_version = package_info.package_version,
-    )
-
-def package_infos_to_json(packages):
-    """Converts a list of PackageInfo to JSON.
-
-    This list is sorted by label for stability.
-
-    Args:
-        packages: list(PackageInfo)
-    Returns:
-        JSON representation of packages.
-    """
-    all_packages = []
-    for package in sorted(packages.to_list(), key = lambda x: x.label):
-        all_packages.append(package_info_to_json(package))
-    return "[" + ",".join(all_packages) + "]"
-
-def labels_to_json(labels):
-    """Converts a list of Labels to JSON.
-
-    This list is sorted for stability.
-
-    Args:
-        labels: list(Label)
-    Returns:
-        JSON representation of the labels.
-    """
-    return "[%s]" % ",".join(['"%s"' % _strip_null_repo(label) for label in sorted(labels)])
diff --git a/tools/compliance/user_filtered_rule_kinds.bzl b/tools/compliance/user_filtered_rule_kinds.bzl
deleted file mode 100644
index 3a37a8353a..0000000000
--- a/tools/compliance/user_filtered_rule_kinds.bzl
+++ /dev/null
@@ -1,20 +0,0 @@
-# This file is copied from the bazel github repository https://github.com/bazelbuild/bazel/blob/master/tools/compliance
-# as it was recently added in latest bazel version but we are currently using old 5.4.0 because of pending
-# bazelmod migration for rules_scala package
-# *** DO NOT MODIFY THIS FILE UNLESS WE REALLY NEED TO ***
-
-"""Filtered rule kinds for aspect inspection.
-
-The format of this dictionary is:
-  rule_name: [attr, attr, ...]
-
-Filters for rules that are not part of the Bazel distribution should be added
-to this file.
-
-Attributes are either the explicit list of attributes to filter, or '_*' which
-would ignore all attributes prefixed with a _.
-"""
-
-# Rule kinds with attributes the aspect currently needs to ignore
-user_aspect_filters = {
-}
diff --git a/tools/compliance/write_sbom.py b/tools/compliance/write_sbom.py
deleted file mode 100644
index b621447e0f..0000000000
--- a/tools/compliance/write_sbom.py
+++ /dev/null
@@ -1,230 +0,0 @@
-# This file is copied from the bazel github repository https://github.com/bazelbuild/bazel/blob/master/tools/compliance
-# as it was recently added in latest bazel version but we are currently using old 5.4.0 because of pending
-# bazelmod migration for rules_scala package
-# *** DO NOT MODIFY THIS FILE UNLESS WE REALLY NEED TO ***
-
-"""SBOM generator.
-
-This tool takes input from several sources and weaves together an SBOM.
-
-Inputs:
-  - the output of packages_used. This is a JSON block of license, package_info
-    and other declarations, plus a list of all remote packages referenced.
-  - the maven lock file (maven_install.json)
-  - FUTURE: other packgage lock files
-  - FUTURE: a user provided override of package URL to corrected information
-
-This tool is private to the sbom() rule.
-"""
-
-import argparse
-import datetime
-import hashlib
-import json
-
-
-# pylint: disable=g-bare-generic
-def create_sbom(package_info: dict, maven_packages: dict) -> dict:
-    """Creates a dict representing an SBOM.
-
-    Args:
-      package_info: dict of data from packages_used output.
-      maven_packages: packages gleaned from Maven lock file.
-
-    Returns:
-      dict of SBOM data
-    """
-    now = datetime.datetime.now(datetime.timezone.utc)
-    ret = {
-        "spdxVersion": "SPDX-2.3",
-        "dataLicense": "CC0-1.0",
-        "SPDXID": "SPDXRef-DOCUMENT",
-        "documentNamespace": (
-            "https://spdx.google/be852459-4c54-4c50-9d2f-0e48890418fc"
-        ),
-        "name": package_info["top_level_target"],
-        "creationInfo": {
-            "licenseListVersion": "",
-            "creators": [
-                "Tool: github.com/bazelbuild/bazel/tools/compliance/write_sbom",
-                "Organization: Google LLC",
-            ],
-            "created": now.isoformat(),
-        },
-    }
-
-    packages = []
-    relationships = []
-
-    relationships.append({
-        "spdxElementId": "SPDXRef-DOCUMENT",
-        "relatedSpdxElement": "SPDXRef-Package-main",
-        "relationshipType": "DESCRIBES"
-    })
-
-    # This is bazel private shenanigans.
-    magic_file_suffix = "//file:file"
-
-    for pkg in package_info["packages"]:
-        tmp_id = hashlib.md5()
-        tmp_id.update(pkg.encode("utf-8"))
-        spdxid = "SPDXRef-GooglePackage-%s" % tmp_id.hexdigest()
-        pi = {
-            "name": pkg,
-            "downloadLocation": "NOASSERTION",
-            "SPDXID": spdxid,
-            # TODO(aiuto): Fill in the rest
-            # "supplier": "Organization: Google LLC",
-            # "licenseConcluded": "License-XXXXXX",
-            # "copyrightText": ""
-        }
-
-        have_maven = None
-        if pkg.startswith("@maven//:"):
-            have_maven = maven_packages.get(pkg[9:])
-        elif pkg.endswith(magic_file_suffix):
-            # Bazel hacks jvm_external to add //file:file as a target, then we depend
-            # on that rather than the correct thing.
-            # Example: @org_apache_tomcat_tomcat_annotations_api_8_0_5//file:file
-            # Check for just the versioned root
-            have_maven = maven_packages.get(pkg[1 : -len(magic_file_suffix)])
-
-        if have_maven:
-            pi["downloadLocation"] = have_maven["url"]
-        else:
-            # TODO(aiuto): Do something better for this case.
-            print("MISSING ", pkg)
-
-        packages.append(pi)
-        relationships.append({
-            "spdxElementId": "SPDXRef-Package-main",
-            "relatedSpdxElement": spdxid,
-            "relationshipType": "CONTAINS",
-        })
-
-    ret["packages"] = packages
-    ret["relationships"] = relationships
-    return ret
-
-
-def maven_to_bazel(s):
-    """Returns a string with maven separators mapped to what we use in Bazel.
-
-    Essentially '.', '-', ':' => '_'.
-
-    Args:
-      s: a string
-
-    Returns:
-      a string
-    """
-    return s.replace(".", "_").replace("-", "_").replace(":", "_")
-
-
-# pylint: disable=g-bare-generic
-def maven_install_to_packages(maven_install: dict) -> dict:
-    """Convert raw maven lock file into a dict keyed by bazel package names.
-
-    Args:
-      maven_install: raw maven lock file data
-
-    Returns:
-      dict keyed by names created by rules_jvm_external
-    """
-
-    # Map repo coordinate back to the download repository.
-    # The input dict is of the form
-    # "https//repo1.maven.org/": [ com.google.foo:some.package, ...]
-    # But.... sometimes the artifact is
-    #    com.google.foo:some.package.jar.arch
-    # and then  that means the artifact table has an entry
-    # in their shasums table keyed by arch.
-
-    repo_to_url = {}
-    for url, repos in maven_install["repositories"].items():
-        for repo in repos:
-            if repo in repo_to_url:
-                print(
-                    "WARNING: Duplicate download path for <%s>. Using %s"
-                    % (repo, repo_to_url[repo])
-                )
-                continue
-            repo_to_url[repo] = url
-
-    ret = {}
-    for name, info in maven_install["artifacts"].items():
-        # With something like this:
-        parts = name.split(":")
-        if len(parts) >= 2:
-            repo = parts[0]
-            artifact = ":".join(parts[1:])  # Join remaining parts
-        else:
-            # Handle case with no colon or just a single part
-            repo = name
-            artifact = ""
-        version = info["version"]
-
-        for arch in info["shasums"].keys():
-            # build the download URL
-            sub_version = version
-            repo_name = name
-            if arch != "jar":
-                sub_version = version + "-" + arch
-                repo_name = "%s:jar:%s" % (name, arch)
-
-            url = (
-                "{mirror}{repo}/{artifact}/{version}/{artifact}-{version}.jar".format(
-                    mirror = repo_to_url.get(repo_name, "https://repo1.maven.org/maven2/"),
-                    repo=repo.replace(".", "/"),
-                    artifact=artifact,
-                    version=version,
-                )
-            )
-            tmp = info.copy()
-            tmp["maven_name"] = name
-            tmp["url"] = url
-            bazel_name = maven_to_bazel(name) + "_" + maven_to_bazel(sub_version)
-            ret[bazel_name] = tmp
-            if arch == "jar":
-                ret[bazel_name] = tmp
-    return ret
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser(
-        description="Helper for creating SBOMs", fromfile_prefix_chars="@"
-    )
-    parser.add_argument(
-        "--out", required=True, help="The output file, mandatory."
-    )
-    parser.add_argument(
-        "--packages_used",
-        required=True,
-        help="JSON list of transitive package data for a target",
-    )
-    parser.add_argument(
-        "--maven_install",
-        required=False,
-        default="",
-        help="Maven lock file",
-    )
-    opts = parser.parse_args()
-
-    with open(opts.packages_used, "rt", encoding="utf-8") as inp:
-        package_info = json.loads(inp.read())
-
-    maven_packages = None
-    if opts.maven_install:
-        with open(opts.maven_install, "rt", encoding="utf-8") as inp:
-            maven_install = json.loads(inp.read())
-            maven_packages = maven_install_to_packages(maven_install)
-            # Useful for debugging
-            # print(json.dumps(maven_packages, indent=2))
-
-    sbom = create_sbom(package_info, maven_packages)
-    with open(opts.out, "w", encoding="utf-8") as out:
-        out.write(json.dumps(sbom, indent=2))
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file