Skip to content

Commit c7041f9

Browse files
committed
Improve caching strategy across the board of CI workflow
We are using various caches in our build and so far - due to the way how "standard" caching works, PRs from forks could not effectively use the cache from main Airflow repository - because caches are not shared with other repositories - so the PRs builds could only use cache effectively when they were rebased and continued running from the same fork. This PR improves caching strategy using "stash" action from the ASF. Unlike `cache` - the action uses artifacts to store cache, and that makes it possible for the stash action to use such cache uploaded from `main` canary builds in PRs coming from the fork. As part of this change all the places where setup-python was used and breeze installed afterwards were reviewed and updated to use only breeze installation action (it already installs python) and this action has been improved to use UV caching effectively. Overall this PR should decrease setup overhead for many jobs across the CI workflow. Follow-up after #45266
1 parent 52ed7d7 commit c7041f9

File tree

8 files changed

+150
-69
lines changed

8 files changed

+150
-69
lines changed

.github/actions/breeze/action.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,11 @@ runs:
3333
uses: actions/setup-python@v5
3434
with:
3535
python-version: ${{ inputs.python-version }}
36-
cache: 'pip'
37-
cache-dependency-path: ./dev/breeze/pyproject.toml
36+
- name: "Restore uv cache for breeze"
37+
uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
38+
with:
39+
key: "breeze-uv-cache-${{ inputs.python-version }}-${{ hashFiles('dev/breeze/pyproject.toml') }}"
40+
path: ~/.cache/uv
3841
- name: "Install Breeze"
3942
shell: bash
4043
run: ./scripts/ci/install_breeze.sh

.github/actions/install-pre-commit/action.yml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,31 +19,31 @@
1919
name: 'Install pre-commit'
2020
description: 'Installs pre-commit and related packages'
2121
inputs:
22+
# TODO(potiuk): automate update of these versions
2223
python-version:
2324
description: 'Python version to use'
24-
default: 3.9
25+
default: "3.9"
2526
uv-version:
2627
description: 'uv version to use'
27-
default: 0.5.5
28+
default: "0.5.13"
2829
pre-commit-version:
2930
description: 'pre-commit version to use'
30-
default: 4.0.1
31+
default: "4.0.1"
3132
pre-commit-uv-version:
3233
description: 'pre-commit-uv version to use'
33-
default: 4.1.4
34+
default: "4.1.4"
3435
runs:
3536
using: "composite"
3637
steps:
38+
- name: "Restore pre-commit cache"
39+
uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
40+
with:
41+
key: "pre-commit-cache-${{ inputs.python-version }}-${{ hashFiles('.pre-commit-config.yaml') }}"
42+
path: ~/.cache/pre-commit/
3743
- name: Install pre-commit, uv, and pre-commit-uv
3844
shell: bash
3945
run: |
4046
pip install uv==${{inputs.uv-version}} || true
4147
uv tool install pre-commit==${{inputs.pre-commit-version}} --with uv==${{inputs.uv-version}} \
4248
--with pre-commit-uv==${{inputs.pre-commit-uv-version}}
43-
- name: Cache pre-commit envs
44-
uses: actions/cache@v4
45-
with:
46-
path: ~/.cache/pre-commit
47-
key: "pre-commit-${{inputs.python-version}}-${{ hashFiles('.pre-commit-config.yaml') }}"
48-
restore-keys: |
49-
pre-commit-${{inputs.python-version}}-
49+
pre-commit install-hooks

.github/workflows/basic-tests.yml

Lines changed: 64 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -72,16 +72,11 @@ jobs:
7272
persist-credentials: false
7373
- name: "Cleanup docker"
7474
run: ./scripts/ci/cleanup_docker.sh
75-
- uses: actions/setup-python@v5
76-
with:
77-
python-version: "${{ inputs.default-python-version }}"
78-
cache: 'pip'
79-
cache-dependency-path: ./dev/breeze/pyproject.toml
80-
- run: pip install --editable ./dev/breeze/
75+
- name: "Install Breeze"
76+
uses: ./.github/actions/breeze
8177
- run: python -m pytest -n auto --color=yes
8278
working-directory: ./dev/breeze/
8379

84-
8580
tests-ui:
8681
timeout-minutes: 10
8782
name: React UI tests
@@ -108,15 +103,24 @@ jobs:
108103
node-version: 21
109104
cache: 'pnpm'
110105
cache-dependency-path: 'airflow/ui/pnpm-lock.yaml'
111-
- name: "Cache eslint"
112-
uses: actions/cache@v4
106+
- name: "Restore eslint cache (ui)"
107+
uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
113108
with:
114-
path: 'airflow/ui/node_modules'
109+
path: 'airflow/ui/node_modules/'
115110
key: ${{ runner.os }}-ui-node-modules-${{ hashFiles('airflow/ui/**/pnpm-lock.yaml') }}
111+
id: restore-eslint-cache
116112
- run: cd airflow/ui && pnpm install --frozen-lockfile
117113
- run: cd airflow/ui && pnpm test
118114
env:
119115
FORCE_COLOR: 2
116+
- name: "Save eslint cache (ui)"
117+
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
118+
with:
119+
path: 'airflow/ui/node_modules/'
120+
key: ${{ runner.os }}-ui-node-modules-${{ hashFiles('airflow/ui/**/pnpm-lock.yaml') }}
121+
if-no-files-found: 'error'
122+
retention-days: '2'
123+
if: steps.restore-eslint-cache.outputs.cache-hit != 'true'
120124

121125
tests-www:
122126
timeout-minutes: 10
@@ -137,15 +141,53 @@ jobs:
137141
uses: actions/setup-node@v4
138142
with:
139143
node-version: 21
140-
- name: "Cache eslint"
141-
uses: actions/cache@v4
144+
- name: "Restore eslint cache (www)"
145+
uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
142146
with:
143-
path: 'airflow/www/node_modules'
147+
path: 'airflow/www/node_modules/'
144148
key: ${{ runner.os }}-www-node-modules-${{ hashFiles('airflow/www/**/yarn.lock') }}
149+
id: restore-eslint-cache
145150
- run: yarn --cwd airflow/www/ install --frozen-lockfile --non-interactive
146151
- run: yarn --cwd airflow/www/ run test
147152
env:
148153
FORCE_COLOR: 2
154+
- name: "Save eslint cache (www)"
155+
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
156+
with:
157+
path: 'airflow/www/node_modules/'
158+
key: ${{ runner.os }}-www-node-modules-${{ hashFiles('airflow/www/**/yarn.lock') }}
159+
if-no-files-found: 'error'
160+
retention-days: '2'
161+
if: steps.restore-eslint-cache.outputs.cache-hit != 'true'
162+
163+
install-pre-commit:
164+
timeout-minutes: 5
165+
name: "Install pre-commit for cache"
166+
runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }}
167+
env:
168+
PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}"
169+
if: inputs.basic-checks-only == 'true'
170+
steps:
171+
- name: "Install Breeze"
172+
uses: ./.github/actions/breeze
173+
id: breeze
174+
- name: "Install pre-commit"
175+
uses: ./.github/actions/install-pre-commit
176+
id: pre-commit
177+
with:
178+
python-version: ${{steps.breeze.outputs.host-python-version}}
179+
# Saving pre-commit cache should happen only in one job in the entire workflow - because otherwise
180+
# it will cause 409 conflict errors - see https://github.com/actions/upload-artifact/issues/478
181+
# the way it works with airflow - even if the same action is in "ci-image-tests" the if condition
182+
# above `if: inputs.basic-checks-only == 'true'` will prevent it from running in the other job
183+
- name: "Save pre-commit cache"
184+
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
185+
with:
186+
# yamllint disable rule:line-length
187+
key: "pre-commit-cache-${{ steps.breeze.outputs.host-python-version }}-${{ hashFiles('.pre-commit-config.yaml') }}"
188+
path: ~/.cache/pre-commit/
189+
if-no-files-found: 'error'
190+
retention-days: '2'
149191

150192
# Those checks are run if no image needs to be built for checks. This is for simple changes that
151193
# Do not touch any of the python code or any of the important files that might require building
@@ -154,6 +196,7 @@ jobs:
154196
timeout-minutes: 30
155197
name: "Static checks: basic checks only"
156198
runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }}
199+
needs: install-pre-commit
157200
if: inputs.basic-checks-only == 'true'
158201
steps:
159202
- name: "Cleanup repo"
@@ -165,18 +208,6 @@ jobs:
165208
persist-credentials: false
166209
- name: "Cleanup docker"
167210
run: ./scripts/ci/cleanup_docker.sh
168-
- name: "Setup python"
169-
uses: actions/setup-python@v5
170-
with:
171-
python-version: ${{ inputs.default-python-version }}
172-
cache: 'pip'
173-
cache-dependency-path: ./dev/breeze/pyproject.toml
174-
- name: "Setup python"
175-
uses: actions/setup-python@v5
176-
with:
177-
python-version: "${{ inputs.default-python-version }}"
178-
cache: 'pip'
179-
cache-dependency-path: ./dev/breeze/pyproject.toml
180211
- name: "Install Breeze"
181212
uses: ./.github/actions/breeze
182213
id: breeze
@@ -216,6 +247,7 @@ jobs:
216247
timeout-minutes: 45
217248
name: "Upgrade checks"
218249
runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }}
250+
needs: install-pre-commit
219251
env:
220252
PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}"
221253
if: inputs.canary-run == 'true' && inputs.latest-versions-only != 'true'
@@ -229,12 +261,14 @@ jobs:
229261
persist-credentials: false
230262
- name: "Cleanup docker"
231263
run: ./scripts/ci/cleanup_docker.sh
232-
# Install python from scratch. No cache used. We always want to have fresh version of everything
233-
- uses: actions/setup-python@v5
264+
- name: "Install Breeze"
265+
uses: ./.github/actions/breeze
266+
id: breeze
267+
- name: "Install pre-commit"
268+
uses: ./.github/actions/install-pre-commit
269+
id: pre-commit
234270
with:
235-
python-version: "${{ inputs.default-python-version }}"
236-
- name: "Install latest pre-commit"
237-
run: pip install pre-commit
271+
python-version: ${{steps.breeze.outputs.host-python-version}}
238272
- name: "Autoupdate all pre-commits"
239273
run: pre-commit autoupdate
240274
- name: "Run automated upgrade for black"

.github/workflows/ci-image-build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,5 +162,5 @@ jobs:
162162
key: "ci-image-save-${{ inputs.platform }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }}"
163163
path: "/tmp/ci-image-save-*-${{ env.PYTHON_MAJOR_MINOR_VERSION }}.tar"
164164
if-no-files-found: 'error'
165-
retention-days: 2
165+
retention-days: '2'
166166
if: inputs.upload-image-artifact == 'true'

.github/workflows/ci-image-checks.yml

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,40 @@ on: # yamllint disable-line rule:truthy
106106
type: string
107107

108108
jobs:
109+
install-pre-commit:
110+
timeout-minutes: 5
111+
name: "Install pre-commit for cache"
112+
runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }}
113+
env:
114+
PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}"
115+
if: inputs.basic-checks-only == 'false'
116+
steps:
117+
- name: "Install Breeze"
118+
uses: ./.github/actions/breeze
119+
id: breeze
120+
- name: "Install pre-commit"
121+
uses: ./.github/actions/install-pre-commit
122+
id: pre-commit
123+
with:
124+
python-version: ${{steps.breeze.outputs.host-python-version}}
125+
# Saving pre-commit cache should happen only in one job in the entire workflow - because otherwise
126+
# it will cause 409 conflict errors - see https://github.com/actions/upload-artifact/issues/478
127+
# the way it works with airflow - even if the same action is in "basic-tests" the if condition
128+
# above `if: inputs.basic-checks-only == 'false'` will prevent it from running in the other job
129+
- name: "Save pre-commit cache"
130+
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
131+
with:
132+
# yamllint disable rule:line-length
133+
key: "pre-commit-cache-${{ steps.breeze.outputs.host-python-version }}-${{ hashFiles('.pre-commit-config.yaml') }}"
134+
path: ~/.cache/pre-commit/
135+
if-no-files-found: 'error'
136+
retention-days: '2'
137+
109138
static-checks:
110139
timeout-minutes: 45
111140
name: "Static checks"
112141
runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }}
142+
needs: install-pre-commit
113143
env:
114144
PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}"
115145
UPGRADE_TO_NEWER_DEPENDENCIES: "${{ inputs.upgrade-to-newer-dependencies }}"
@@ -123,12 +153,6 @@ jobs:
123153
uses: actions/checkout@v4
124154
with:
125155
persist-credentials: false
126-
- name: "Setup python"
127-
uses: actions/setup-python@v5
128-
with:
129-
python-version: ${{ inputs.default-python-version }}
130-
cache: 'pip'
131-
cache-dependency-path: ./dev/breeze/pyproject.toml
132156
- name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}"
133157
uses: ./.github/actions/prepare_breeze_and_image
134158
with:
@@ -154,6 +178,7 @@ jobs:
154178
timeout-minutes: 45
155179
name: "MyPy checks"
156180
runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }}
181+
needs: install-pre-commit
157182
if: inputs.needs-mypy == 'true'
158183
strategy:
159184
fail-fast: false
@@ -221,24 +246,30 @@ jobs:
221246
with:
222247
platform: "linux/amd64"
223248
python: ${{ inputs.default-python-version }}
224-
- uses: actions/cache@v4
225-
id: cache-doc-inventories
249+
- name: "Restore docs inventory cache"
250+
uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
226251
with:
227252
path: ./docs/_inventory_cache/
228-
key: docs-inventory-${{ hashFiles('pyproject.toml;') }}
229-
restore-keys: |
230-
docs-inventory-${{ hashFiles('pyproject.toml;') }}
231-
docs-inventory-
253+
key: docs-inventory-${{ hashFiles('pyproject.toml') }}
254+
id: restore-eslint-cache
232255
- name: "Building docs with ${{ matrix.flag }} flag"
233256
run: >
234257
breeze build-docs ${{ inputs.docs-list-as-string }} ${{ matrix.flag }}
258+
- name: "Save docs inventory cache"
259+
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
260+
with:
261+
path: ./docs/_inventory_cache/
262+
key: docs-inventory-${{ hashFiles('pyproject.toml') }}
263+
if-no-files-found: 'error'
264+
retention-days: '2'
265+
if: steps.restore-eslint-cache.outputs.cache-hit != 'true'
235266
- name: "Upload build docs"
236267
uses: actions/upload-artifact@v4
237268
with:
238269
name: airflow-docs
239270
path: './docs/_build'
240-
retention-days: 7
241-
if-no-files-found: error
271+
retention-days: '7'
272+
if-no-files-found: 'error'
242273
if: matrix.flag == '--docs-only'
243274

244275
publish-docs:

.github/workflows/ci.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,13 @@ jobs:
160160
persist-credentials: false
161161
- name: "Install Breeze"
162162
uses: ./.github/actions/breeze
163+
- name: "Save uv cache for breeze"
164+
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
165+
with:
166+
key: "breeze-uv-cache-${{ inputs.python-version }}-${{ hashFiles('dev/breeze/pyproject.toml') }}"
167+
path: ~/.cache/uv/
168+
if-no-files-found: 'error'
169+
retention-days: '2'
163170
- name: "Get information about the Workflow"
164171
id: source-run-info
165172
run: breeze ci get-workflow-info 2>> ${GITHUB_OUTPUT}
@@ -171,7 +178,6 @@ jobs:
171178
PR_LABELS: "${{ steps.source-run-info.outputs.pr-labels }}"
172179
COMMIT_REF: "${{ github.sha }}"
173180
VERBOSE: "false"
174-
175181
run: breeze ci selective-check 2>> ${GITHUB_OUTPUT}
176182
- name: env
177183
run: printenv

.github/workflows/k8s-tests.yml

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,14 @@ jobs:
8585
platform: ${{ inputs.platform }}
8686
image-type: "prod"
8787
python: ${{ env.PYTHON_MAJOR_MINOR_VERSION }}
88-
- name: "Cache bin folder with tools for kubernetes testing"
89-
uses: actions/cache@v4
88+
- name: "Restore cache folder with tools for kubernetes testing"
89+
uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
9090
with:
91-
path: ".build/.k8s-env"
9291
key: "\
9392
k8s-env-${{ steps.breeze.outputs.host-python-version }}-\
9493
${{ hashFiles('scripts/ci/kubernetes/k8s_requirements.txt','hatch_build.py') }}"
94+
path: ".build/.k8s-env"
95+
id: restore-k8s-env-cache
9596
- name: "\
9697
Run complete K8S tests ${{ matrix.executor }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }}-\
9798
${{env.KUBERNETES_VERSION}}-${{ matrix.use-standard-naming }}"
@@ -100,6 +101,16 @@ jobs:
100101
EXECUTOR: ${{ matrix.executor }}
101102
USE_STANDARD_NAMING: ${{ matrix.use-standard-naming }}
102103
VERBOSE: "false"
104+
- name: "Save cache folder with tools for kubernetes testing"
105+
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
106+
with:
107+
key: "\
108+
k8s-env-${{ steps.breeze.outputs.host-python-version }}-\
109+
${{ hashFiles('scripts/ci/kubernetes/k8s_requirements.txt','hatch_build.py') }}"
110+
path: ".build/.k8s-env"
111+
if-no-files-found: 'error'
112+
retention-days: '2'
113+
if: steps.restore-k8s-env-cache.outputs.cache-hit != 'true'
103114
- name: "\
104115
Upload KinD logs on failure ${{ matrix.executor }}-${{ matrix.kubernetes-combo }}-\
105116
${{ matrix.use-standard-naming }}"
@@ -110,7 +121,7 @@ jobs:
110121
kind-logs-${{ matrix.kubernetes-combo }}-${{ matrix.executor }}-\
111122
${{ matrix.use-standard-naming }}"
112123
path: /tmp/kind_logs_*
113-
retention-days: 7
124+
retention-days: '7'
114125
- name: "\
115126
Upload test resource logs on failure ${{ matrix.executor }}-${{ matrix.kubernetes-combo }}-\
116127
${{ matrix.use-standard-naming }}"
@@ -121,7 +132,7 @@ jobs:
121132
k8s-test-resources-${{ matrix.kubernetes-combo }}-${{ matrix.executor }}-\
122133
${{ matrix.use-standard-naming }}"
123134
path: /tmp/k8s_test_resources_*
124-
retention-days: 7
135+
retention-days: '7'
125136
- name: "Delete clusters just in case they are left"
126137
run: breeze k8s delete-cluster --all
127138
if: always()

0 commit comments

Comments
 (0)