Spark dataproc/change vm versions #308
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Pull Request trigger | |
on: | |
pull_request: | |
permissions: | |
id-token: write | |
contents: write | |
pull-requests: write | |
issues: write | |
jobs: | |
initialize_workflow: | |
runs-on: ubuntu-latest | |
outputs: | |
run_dataplex: ${{ steps.get-changed.outputs.dataplex_changed }} | |
run_scenarios: ${{ steps.get-changed.outputs.scenarios_changed }} | |
run_spark_dataproc: ${{ steps.get-changed.outputs.spark_dataproc_changed }} | |
run_hive_dataproc: ${{ steps.get-changed.outputs.hive_dataproc_changed }} | |
ol_release: ${{ steps.get-release.outputs.openlineage_release }} | |
any_run: ${{ steps.get-changed.outputs.any_changed }} | |
spark_matrix: ${{ steps.set-matrix-values.outputs.spark_dataproc_matrix }} | |
hive_matrix: ${{ steps.set-matrix-values.outputs.hive_dataproc_matrix }} | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: check file structure | |
id: check-structure | |
run: ./scripts/check_structure.sh | |
- name: check configs | |
id: check-configs | |
run: ./scripts/check_configs.sh | |
- name: get changed files | |
id: get-changed | |
run: | | |
check_path() { | |
local path=$1 | |
local output=$2 | |
if echo "$CHANGED_FILES" | grep -q "$path"; then | |
echo "$output=true" >> $GITHUB_OUTPUT | |
echo "true" | |
fi | |
} | |
CHANGED_FILES=$(gh pr diff ${{ github.event.pull_request.number }} --name-only) | |
if [[ -n "$CHANGED_FILES" ]]; then | |
echo "changes=$(echo "$CHANGED_FILES" | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT | |
scenarios=$(check_path "consumer/scenarios/" "scenarios_changed") | |
dataplex=$(check_path "consumer/consumers/dataplex/" "dataplex_changed") | |
spark_dataproc=$(check_path "producer/spark_dataproc/" "spark_dataproc_changed") | |
hive_dataproc=$(check_path "producer/hive_dataproc/" "hive_dataproc_changed") | |
if [[ $scenarios || $dataplex || $spark_dataproc || $hive_dataproc ]]; then | |
echo "any_changed=true" >> $GITHUB_OUTPUT | |
fi | |
fi | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: get openlineage release | |
id: get-release | |
run: | | |
echo " any changed value is ${{ steps.get-changed.outputs.any_changed }}" | |
openlineage_release=$(cat generated-files/releases.json | jq -c '.[] | select(.name | contains("openlineage")) | .latest_version ' -r) | |
echo "openlineage_release=${openlineage_release}" >> $GITHUB_OUTPUT | |
- name: set-matrix-values | |
id: set-matrix-values | |
run: | | |
get_matrix() { | |
local component="$1" | |
local releases_file="./generated-files/releases.json" | |
local versions_file="producer/$component/versions.json" | |
[[ -f "$releases_file" && -f "$versions_file" ]] || { | |
echo "Missing releases.json or versions.json" >&2 | |
return 1 | |
} | |
local latest_ol=$(jq -r '.[] | select(.name == "openlineage") | .latest_version' "$releases_file") | |
local latest_comp=$(jq -r --arg name "$component" '.[] | select(.name == $name) | .latest_version' "$releases_file") | |
jq -c --arg ol "$latest_ol" --arg comp "$latest_comp" ' | |
.openlineage_versions |= (if $ol != "" and index($ol) == null then . + [$ol] else . end) | | |
.component_version |= (if $comp != "" and index($comp) == null then . + [$comp] else . end) | |
' "$versions_file" | |
} | |
echo "spark_dataproc_matrix=$(get_matrix spark_dataproc)" >> $GITHUB_OUTPUT | |
echo "hive_dataproc_matrix=$(get_matrix hive_dataproc)" >> $GITHUB_OUTPUT | |
######## COMPONENT VALIDATION ######## | |
scenarios: | |
needs: initialize_workflow | |
if: ${{ needs.initialize_workflow.outputs.run_scenarios == 'true' }} | |
uses: ./.github/workflows/check_scenarios.yml | |
with: | |
get-latest-snapshots: false | |
release: ${{ needs.initialize_workflow.outputs.ol_release }} | |
dataplex: | |
needs: | |
- initialize_workflow | |
- scenarios | |
if: ${{ !failure() && needs.initialize_workflow.outputs.run_dataplex == 'true' }} | |
uses: ./.github/workflows/consumer_dataplex.yml | |
secrets: | |
gcpKey: ${{ secrets.GCP_SA_KEY }} | |
with: | |
release: ${{ needs.initialize_workflow.outputs.ol_release }} | |
spark_dataproc: | |
needs: initialize_workflow | |
if: ${{ needs.initialize_workflow.outputs.run_spark_dataproc == 'true' }} | |
uses: ./.github/workflows/producer_spark_dataproc.yml | |
strategy: | |
matrix: ${{ fromJson(needs.initialize_workflow.outputs.spark_matrix) }} | |
secrets: | |
gcpKey: ${{ secrets.GCP_SA_KEY }} | |
postgresqlUser: ${{ secrets.POSTGRESQL_USER }} | |
postgresqlPassword: ${{ secrets.POSTGRESQL_PASSWORD }} | |
with: | |
ol_release: ${{ matrix.openlineage_versions }} | |
spark_release: ${{ matrix.component_version }} | |
get-latest-snapshots: 'false' | |
hive_dataproc: | |
needs: initialize_workflow | |
if: ${{ needs.initialize_workflow.outputs.run_hive_dataproc == 'true' }} | |
uses: ./.github/workflows/producer_hive_dataproc.yml | |
strategy: | |
matrix: ${{ fromJson(needs.initialize_workflow.outputs.hive_matrix) }} | |
secrets: | |
gcpKey: ${{ secrets.GCP_SA_KEY }} | |
with: | |
ol_release: ${{ matrix.openlineage_versions }} | |
component_release: ${{ matrix.component_version }} | |
get-latest-snapshots: 'false' | |
######## COLLECTION OF REPORTS AND EXECUTE APPROPRIATE ACTIONS ######## | |
collect-and-compare-reports: | |
needs: | |
- initialize_workflow | |
- scenarios | |
- dataplex | |
- hive_dataproc | |
if: ${{ !failure() && needs.initialize_workflow.outputs.any_run == 'true'}} | |
uses: ./.github/workflows/collect_and_compare_reports.yml | |
with: | |
fail-for-new-failures: true | |
generate-compatibility-tables: | |
needs: | |
- collect-and-compare-reports | |
uses: ./.github/workflows/generate_compatibility_tables.yml |