Skip to content

Spark dataproc/change vm versions #308

Spark dataproc/change vm versions

Spark dataproc/change vm versions #308

Workflow file for this run

name: Pull Request trigger
on:
pull_request:
permissions:
id-token: write
contents: write
pull-requests: write
issues: write
jobs:
initialize_workflow:
runs-on: ubuntu-latest
outputs:
run_dataplex: ${{ steps.get-changed.outputs.dataplex_changed }}
run_scenarios: ${{ steps.get-changed.outputs.scenarios_changed }}
run_spark_dataproc: ${{ steps.get-changed.outputs.spark_dataproc_changed }}
run_hive_dataproc: ${{ steps.get-changed.outputs.hive_dataproc_changed }}
ol_release: ${{ steps.get-release.outputs.openlineage_release }}
any_run: ${{ steps.get-changed.outputs.any_changed }}
spark_matrix: ${{ steps.set-matrix-values.outputs.spark_dataproc_matrix }}
hive_matrix: ${{ steps.set-matrix-values.outputs.hive_dataproc_matrix }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: check file structure
id: check-structure
run: ./scripts/check_structure.sh
- name: check configs
id: check-configs
run: ./scripts/check_configs.sh
- name: get changed files
id: get-changed
run: |
check_path() {
local path=$1
local output=$2
if echo "$CHANGED_FILES" | grep -q "$path"; then
echo "$output=true" >> $GITHUB_OUTPUT
echo "true"
fi
}
CHANGED_FILES=$(gh pr diff ${{ github.event.pull_request.number }} --name-only)
if [[ -n "$CHANGED_FILES" ]]; then
echo "changes=$(echo "$CHANGED_FILES" | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
scenarios=$(check_path "consumer/scenarios/" "scenarios_changed")
dataplex=$(check_path "consumer/consumers/dataplex/" "dataplex_changed")
spark_dataproc=$(check_path "producer/spark_dataproc/" "spark_dataproc_changed")
hive_dataproc=$(check_path "producer/hive_dataproc/" "hive_dataproc_changed")
if [[ $scenarios || $dataplex || $spark_dataproc || $hive_dataproc ]]; then
echo "any_changed=true" >> $GITHUB_OUTPUT
fi
fi
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: get openlineage release
id: get-release
run: |
echo " any changed value is ${{ steps.get-changed.outputs.any_changed }}"
openlineage_release=$(cat generated-files/releases.json | jq -c '.[] | select(.name | contains("openlineage")) | .latest_version ' -r)
echo "openlineage_release=${openlineage_release}" >> $GITHUB_OUTPUT
- name: set-matrix-values
id: set-matrix-values
run: |
get_matrix() {
local component="$1"
local releases_file="./generated-files/releases.json"
local versions_file="producer/$component/versions.json"
[[ -f "$releases_file" && -f "$versions_file" ]] || {
echo "Missing releases.json or versions.json" >&2
return 1
}
local latest_ol=$(jq -r '.[] | select(.name == "openlineage") | .latest_version' "$releases_file")
local latest_comp=$(jq -r --arg name "$component" '.[] | select(.name == $name) | .latest_version' "$releases_file")
jq -c --arg ol "$latest_ol" --arg comp "$latest_comp" '
.openlineage_versions |= (if $ol != "" and index($ol) == null then . + [$ol] else . end) |
.component_version |= (if $comp != "" and index($comp) == null then . + [$comp] else . end)
' "$versions_file"
}
echo "spark_dataproc_matrix=$(get_matrix spark_dataproc)" >> $GITHUB_OUTPUT
echo "hive_dataproc_matrix=$(get_matrix hive_dataproc)" >> $GITHUB_OUTPUT
######## COMPONENT VALIDATION ########
scenarios:
needs: initialize_workflow
if: ${{ needs.initialize_workflow.outputs.run_scenarios == 'true' }}
uses: ./.github/workflows/check_scenarios.yml
with:
get-latest-snapshots: false
release: ${{ needs.initialize_workflow.outputs.ol_release }}
dataplex:
needs:
- initialize_workflow
- scenarios
if: ${{ !failure() && needs.initialize_workflow.outputs.run_dataplex == 'true' }}
uses: ./.github/workflows/consumer_dataplex.yml
secrets:
gcpKey: ${{ secrets.GCP_SA_KEY }}
with:
release: ${{ needs.initialize_workflow.outputs.ol_release }}
spark_dataproc:
needs: initialize_workflow
if: ${{ needs.initialize_workflow.outputs.run_spark_dataproc == 'true' }}
uses: ./.github/workflows/producer_spark_dataproc.yml
strategy:
matrix: ${{ fromJson(needs.initialize_workflow.outputs.spark_matrix) }}
secrets:
gcpKey: ${{ secrets.GCP_SA_KEY }}
postgresqlUser: ${{ secrets.POSTGRESQL_USER }}
postgresqlPassword: ${{ secrets.POSTGRESQL_PASSWORD }}
with:
ol_release: ${{ matrix.openlineage_versions }}
spark_release: ${{ matrix.component_version }}
get-latest-snapshots: 'false'
hive_dataproc:
needs: initialize_workflow
if: ${{ needs.initialize_workflow.outputs.run_hive_dataproc == 'true' }}
uses: ./.github/workflows/producer_hive_dataproc.yml
strategy:
matrix: ${{ fromJson(needs.initialize_workflow.outputs.hive_matrix) }}
secrets:
gcpKey: ${{ secrets.GCP_SA_KEY }}
with:
ol_release: ${{ matrix.openlineage_versions }}
component_release: ${{ matrix.component_version }}
get-latest-snapshots: 'false'
######## COLLECTION OF REPORTS AND EXECUTE APPROPRIATE ACTIONS ########
collect-and-compare-reports:
needs:
- initialize_workflow
- scenarios
- dataplex
- hive_dataproc
if: ${{ !failure() && needs.initialize_workflow.outputs.any_run == 'true'}}
uses: ./.github/workflows/collect_and_compare_reports.yml
with:
fail-for-new-failures: true
generate-compatibility-tables:
needs:
- collect-and-compare-reports
uses: ./.github/workflows/generate_compatibility_tables.yml