Skip to content

Check for changes in OL #133

Check for changes in OL

Check for changes in OL #133

name: Check for changes in OL
# Assumption here is that there will be a change that touches spec or spark integration
on:
schedule:
- cron: "0 5 * * 0"
workflow_dispatch:
inputs:
run_tests:
description: 'Run tests'
required: false
default: 'true'
openlineage_release:
description: 'Override OpenLineage release version'
required: false
spark_matrix:
description: 'Overwrite matrix for spark tests'
required: false
hive_matrix:
description: 'Overwrite matrix for hive tests'
required: false
permissions:
id-token: write
contents: write
pull-requests: write
issues: write
jobs:
initialize_workflow:
runs-on: ubuntu-latest
outputs:
changes_in_spec: ${{ github.event.inputs.run_tests || steps.check-changes.outputs.changes_found }}
ol_release: ${{ github.event.inputs.openlineage_release || steps.get-release.outputs.openlineage_release }}
spark_matrix: ${{ github.event.inputs.spark_matrix || steps.set-matrix-values.outputs.spark_dataproc_matrix }}
hive_matrix: ${{ github.event.inputs.hive_matrix || steps.set-matrix-values.outputs.hive_dataproc_matrix }}
execution_time: ${{ steps.get-execution-time.outputs.execution_time }}
steps:
- name: Get execution time
id: get-execution-time
run: echo "execution_time=$(date +'%Y%d%m%H%M')" >> $GITHUB_OUTPUT
- name: Checkout
uses: actions/checkout@v4
- name: create dir for ol spec and report
id: dir_for_spec
shell: bash
run: |
mkdir -p openlineage
- name: get latest OL spec
uses: actions/checkout@v4
with:
repository: OpenLineage/OpenLineage
path: openlineage
- name: check for changes in spec
id: check-changes
run: |
echo "changes_found=false" >> $GITHUB_OUTPUT
python scripts/compare_spec_versions.py \
--spec_base=./openlineage/spec \
--versions_path=./generated-files/spec_versions.json \
--new_versions_path=./updated-spec-versions.json
if [ -f updated-spec-versions.json ]; then
echo "changes_found=true" >> $GITHUB_OUTPUT
fi
- uses: actions/upload-artifact@v4
if: steps.check-changes.outputs.changes_found == 'true'
with:
name: spec-versions
path: generated-files/updated-spec-versions.json
retention-days: 1
- name: get openlineage release
if: steps.check-changes.outputs.changes_found == 'true'
id: get-release
run: |
openlineage_release=$(cat generated-files/releases.json | jq -c '.[] | select(.name | contains("openlineage")) | .latest_version ' -r)
echo "openlineage_release=${openlineage_release}" >> $GITHUB_OUTPUT
- name: set-matrix-values
id: set-matrix-values
run: |
get_matrix() {
local component="$1"
local releases_file="./generated-files/releases.json"
local versions_file="producer/$component/versions.json"
[[ -f "$releases_file" && -f "$versions_file" ]] || {
echo "Missing releases.json or versions.json" >&2
return 1
}
local latest_ol=$(jq -r '.[] | select(.name == "openlineage") | .latest_version' "$releases_file")
local latest_comp=$(jq -r --arg name "$component" '.[] | select(.name == $name) | .latest_version' "$releases_file")
jq -c --arg ol "$latest_ol" --arg comp "$latest_comp" '
.openlineage_versions |= (if $ol != "" and index($ol) == null then . + [$ol] else . end) |
.component_version |= (if $comp != "" and index($comp) == null then . + [$comp] else . end)
' "$versions_file"
}
echo "spark_dataproc_matrix=$(get_matrix spark_dataproc)" >> $GITHUB_OUTPUT
echo "hive_dataproc_matrix=$(get_matrix hive_dataproc)" >> $GITHUB_OUTPUT
######## COMPONENT VALIDATION ########
# check if scenarios are still compatible with the new spec
scenarios_check:
needs:
- initialize_workflow
if: ${{ success() && needs.initialize_workflow.outputs.changes_in_spec == 'true' }}
uses: ./.github/workflows/check_scenarios.yml
with:
release: ${{ needs.initialize_workflow.outputs.ol_release }}
get-latest-snapshots: 'true'
fail-for-failures: false
# check if producers still produce compatible events
spark-dataproc:
needs:
- initialize_workflow
if: ${{ success() && needs.initialize_workflow.outputs.changes_in_spec == 'true' }}
uses: ./.github/workflows/producer_spark_dataproc.yml
strategy:
matrix: ${{ fromJson(needs.initialize_workflow.outputs.spark_matrix) }}
secrets:
gcpKey: ${{ secrets.GCP_SA_KEY }}
postgresqlUser: ${{ secrets.POSTGRESQL_USER }}
postgresqlPassword: ${{ secrets.POSTGRESQL_PASSWORD }}
with:
ol_release: ${{ matrix.openlineage_versions }}
spark_release: ${{ matrix.component_version }}
get-latest-snapshots: 'true'
hive-dataproc:
needs:
- initialize_workflow
if: ${{ success() && needs.initialize_workflow.outputs.changes_in_spec == 'true' }}
uses: ./.github/workflows/producer_hive_dataproc.yml
strategy:
matrix: ${{ fromJson(needs.initialize_workflow.outputs.hive_matrix) }}
secrets:
gcpKey: ${{ secrets.GCP_SA_KEY }}
with:
ol_release: ${{ matrix.openlineage_versions }}
component_release: ${{ matrix.component_version }}
get-latest-snapshots: 'true'
######## COLLECTION OF REPORTS AND EXECUTE APPROPRIATE ACTIONS ########
collect-and-compare-reports:
needs:
- initialize_workflow
- scenarios_check
- spark-dataproc
- hive-dataproc
uses: ./.github/workflows/collect_and_compare_reports.yml
with:
fail-for-new-failures: true
notify-maintainers:
needs:
- initialize_workflow
- collect-and-compare-reports
if: ${{ failure() }}
uses: ./.github/workflows/notify_maintainers.yml
with:
workflow-type: 'spec_change'
trigger-type: ${{ github.event.inputs.openlineage_release && 'manual' || 'scheduled' }}
execution-time: ${{ needs.initialize_workflow.outputs.execution_time }}
update-repo-files:
needs:
- initialize_workflow
- notify-maintainers
if: ${{ !failure() }}
uses: ./.github/workflows/update_repo_files.yml
secrets:
CI_PUSH_TOKEN: ${{ secrets.CI_PUSH_TOKEN }}
with:
workflow-type: 'spec_change'
trigger-type: ${{ github.event.inputs.openlineage_release && 'manual' || 'scheduled' }}
execution-time: ${{ needs.initialize_workflow.outputs.execution_time }}