Skip to content

Commit ed286c5

Browse files
committed
Improve caching strategy across the board of CI workflow
We are using various caches in our build and so far - due to the way how "standard" caching works, PRs from forks could not effectively use the cache from main Airflow repository - because caches are not shared with other repositories - so the PRs builds could only use cache effectively when they were rebased and continued running from the same fork. This PR improves caching strategy using "stash" action from the ASF. Unlike `cache` - the action uses artifacts to store cache, and that makes it possible for the stash action to use such cache uploaded from `main` canary builds in PRs coming from the fork. As part of this change all the places where setup-python was used and breeze installed afterwards were reviewed and updated to use only breeze installation action (it already installs python) and this action has been improved to use UV caching effectively. Overall this PR should decrease setup overhead for many jobs across the CI workflow. Follow-up after #45266
1 parent 52ed7d7 commit ed286c5

File tree

16 files changed

+291
-128
lines changed

16 files changed

+291
-128
lines changed

.github/actions/breeze/action.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ runs:
3333
uses: actions/setup-python@v5
3434
with:
3535
python-version: ${{ inputs.python-version }}
36-
cache: 'pip'
37-
cache-dependency-path: ./dev/breeze/pyproject.toml
36+
# NOTE! Installing Breeze without using cache is FASTER than when using cache - uv is so fast and has
37+
# so low overhead, that just running upload cache/restore cache is slower than installing it from scratch
3838
- name: "Install Breeze"
3939
shell: bash
4040
run: ./scripts/ci/install_breeze.sh

.github/actions/install-pre-commit/action.yml

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,19 @@
1919
name: 'Install pre-commit'
2020
description: 'Installs pre-commit and related packages'
2121
inputs:
22+
# TODO(potiuk): automate update of these versions
2223
python-version:
2324
description: 'Python version to use'
24-
default: 3.9
25+
default: "3.9"
2526
uv-version:
2627
description: 'uv version to use'
27-
default: 0.5.5
28+
default: "0.5.13"
2829
pre-commit-version:
2930
description: 'pre-commit version to use'
30-
default: 4.0.1
31+
default: "4.0.1"
3132
pre-commit-uv-version:
3233
description: 'pre-commit-uv version to use'
33-
default: 4.1.4
34+
default: "4.1.4"
3435
runs:
3536
using: "composite"
3637
steps:
@@ -40,10 +41,35 @@ runs:
4041
pip install uv==${{inputs.uv-version}} || true
4142
uv tool install pre-commit==${{inputs.pre-commit-version}} --with uv==${{inputs.uv-version}} \
4243
--with pre-commit-uv==${{inputs.pre-commit-uv-version}}
43-
- name: Cache pre-commit envs
44-
uses: actions/cache@v4
44+
working-directory: ${{ github.workspace }}
45+
# We need to use tar file with archive to restore all the permissions and symlinks
46+
- name: "Delete ~.cache"
47+
run: |
48+
du ~/ --max-depth=2
49+
echo
50+
echo Deleting ~/.cache
51+
echo
52+
rm -rf ~/.cache
53+
echo
54+
shell: bash
55+
- name: "Restore pre-commit cache"
56+
uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
4557
with:
46-
path: ~/.cache/pre-commit
47-
key: "pre-commit-${{inputs.python-version}}-${{ hashFiles('.pre-commit-config.yaml') }}"
48-
restore-keys: |
49-
pre-commit-${{inputs.python-version}}-
58+
key: cache-pre-commit-v4-${{ inputs.python-version }}-${{ hashFiles('.pre-commit-config.yaml') }}
59+
path: /tmp/
60+
id: restore-pre-commit-cache
61+
- name: "Restore .cache from the tar file"
62+
run: tar -C ~ -xzf /tmp/cache-pre-commit.tar.gz
63+
shell: bash
64+
if: steps.restore-pre-commit-cache.outputs.stash-hit == 'true'
65+
- name: "Show restored files"
66+
run: |
67+
echo "Restored files"
68+
du ~/ --max-depth=2
69+
echo
70+
shell: bash
71+
if: steps.restore-pre-commit-cache.outputs.stash-hit == 'true'
72+
- name: Install pre-commit hooks
73+
shell: bash
74+
run: pre-commit install-hooks || (cat ~/.cache/pre-commit/pre-commit.log && exit 1)
75+
working-directory: ${{ github.workspace }}

.github/actions/prepare_breeze_and_image/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ runs:
4444
- name: "Restore ${{ inputs.image-type }} docker image ${{ inputs.platform }}:${{ inputs.python }}"
4545
uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
4646
with:
47-
key: "${{ inputs.image-type }}-image-save-${{ inputs.platform }}-${{ inputs.python }}"
47+
key: ${{ inputs.image-type }}-image-save-${{ inputs.platform }}-${{ inputs.python }}
4848
path: "/tmp/"
4949
- name: "Load ${{ inputs.image-type }} image ${{ inputs.platform }}:${{ inputs.python }}"
5050
run: >

.github/actions/prepare_single_ci_image/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ runs:
3838
- name: "Restore CI docker images ${{ inputs.platform }}:${{ inputs.python }}"
3939
uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
4040
with:
41-
key: "ci-image-save-${{ inputs.platform }}-${{ inputs.python }}"
41+
key: ci-image-save-${{ inputs.platform }}-${{ inputs.python }}
4242
path: "/tmp/"
4343
if: contains(inputs.python-versions-list-as-string, inputs.python)
4444
- name: "Load CI image ${{ inputs.platform }}:${{ inputs.python }}"

.github/workflows/basic-tests.yml

Lines changed: 78 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -72,16 +72,11 @@ jobs:
7272
persist-credentials: false
7373
- name: "Cleanup docker"
7474
run: ./scripts/ci/cleanup_docker.sh
75-
- uses: actions/setup-python@v5
76-
with:
77-
python-version: "${{ inputs.default-python-version }}"
78-
cache: 'pip'
79-
cache-dependency-path: ./dev/breeze/pyproject.toml
80-
- run: pip install --editable ./dev/breeze/
81-
- run: python -m pytest -n auto --color=yes
75+
- name: "Install Breeze"
76+
uses: ./.github/actions/breeze
77+
- run: uv tool run --from apache-airflow-breeze pytest -n auto --color=yes
8278
working-directory: ./dev/breeze/
8379

84-
8580
tests-ui:
8681
timeout-minutes: 10
8782
name: React UI tests
@@ -108,15 +103,24 @@ jobs:
108103
node-version: 21
109104
cache: 'pnpm'
110105
cache-dependency-path: 'airflow/ui/pnpm-lock.yaml'
111-
- name: "Cache eslint"
112-
uses: actions/cache@v4
106+
- name: "Restore eslint cache (ui)"
107+
uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
113108
with:
114-
path: 'airflow/ui/node_modules'
115-
key: ${{ runner.os }}-ui-node-modules-${{ hashFiles('airflow/ui/**/pnpm-lock.yaml') }}
109+
path: airflow/ui/node_modules/
110+
key: cache-ui-node-modules-v1-${{ runner.os }}-${{ hashFiles('airflow/ui/**/pnpm-lock.yaml') }}
111+
id: restore-eslint-cache
116112
- run: cd airflow/ui && pnpm install --frozen-lockfile
117113
- run: cd airflow/ui && pnpm test
118114
env:
119115
FORCE_COLOR: 2
116+
- name: "Save eslint cache (ui)"
117+
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
118+
with:
119+
path: airflow/ui/node_modules/
120+
key: cache-ui-node-modules-v1-${{ runner.os }}-${{ hashFiles('airflow/ui/**/pnpm-lock.yaml') }}
121+
if-no-files-found: 'error'
122+
retention-days: '2'
123+
if: steps.restore-eslint-cache.outputs.stash-hit != 'true'
120124

121125
tests-www:
122126
timeout-minutes: 10
@@ -137,15 +141,64 @@ jobs:
137141
uses: actions/setup-node@v4
138142
with:
139143
node-version: 21
140-
- name: "Cache eslint"
141-
uses: actions/cache@v4
144+
- name: "Restore eslint cache (www)"
145+
uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
142146
with:
143-
path: 'airflow/www/node_modules'
144-
key: ${{ runner.os }}-www-node-modules-${{ hashFiles('airflow/www/**/yarn.lock') }}
147+
path: airflow/www/node_modules/
148+
key: cache-www-node-modules-v1-${{ runner.os }}-${{ hashFiles('airflow/www/**/yarn.lock') }}
149+
id: restore-eslint-cache
145150
- run: yarn --cwd airflow/www/ install --frozen-lockfile --non-interactive
146151
- run: yarn --cwd airflow/www/ run test
147152
env:
148153
FORCE_COLOR: 2
154+
- name: "Save eslint cache (www)"
155+
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
156+
with:
157+
path: airflow/www/node_modules/
158+
key: cache-www-node-modules-v1-${{ runner.os }}-${{ hashFiles('airflow/www/**/yarn.lock') }}
159+
if-no-files-found: 'error'
160+
retention-days: '2'
161+
if: steps.restore-eslint-cache.outputs.stash-hit != 'true'
162+
163+
install-pre-commit:
164+
timeout-minutes: 5
165+
name: "Install pre-commit for cache"
166+
runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }}
167+
env:
168+
PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}"
169+
if: inputs.basic-checks-only == 'true'
170+
steps:
171+
- name: "Cleanup repo"
172+
shell: bash
173+
run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*"
174+
- name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
175+
uses: actions/checkout@v4
176+
with:
177+
persist-credentials: false
178+
- name: "Install Breeze"
179+
uses: ./.github/actions/breeze
180+
id: breeze
181+
- name: "Install pre-commit"
182+
uses: ./.github/actions/install-pre-commit
183+
id: pre-commit
184+
with:
185+
python-version: ${{steps.breeze.outputs.host-python-version}}
186+
- name: "Prepare .tar file from pre-commit cache"
187+
run: |
188+
tar -C ~ -czf /tmp/cache-pre-commit.tar.gz .cache/pre-commit .cache/uv
189+
shell: bash
190+
# Saving pre-commit cache should happen only in one job in the entire workflow - because otherwise
191+
# it will cause 409 conflict errors - see https://github.com/actions/upload-artifact/issues/478
192+
# the way it works with airflow - even if the same action is in "ci-image-tests" the if condition
193+
# above `if: inputs.basic-checks-only == 'true'` will prevent it from running in the other job
194+
- name: "Save pre-commit[pre-commit] cache"
195+
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
196+
with:
197+
# yamllint disable rule:line-length
198+
key: cache-pre-commit-v4-${{ steps.breeze.outputs.host-python-version }}-${{ hashFiles('.pre-commit-config.yaml') }}
199+
path: /tmp/cache-pre-commit.tar.gz
200+
if-no-files-found: 'error'
201+
retention-days: '2'
149202

150203
# Those checks are run if no image needs to be built for checks. This is for simple changes that
151204
# Do not touch any of the python code or any of the important files that might require building
@@ -154,6 +207,7 @@ jobs:
154207
timeout-minutes: 30
155208
name: "Static checks: basic checks only"
156209
runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }}
210+
needs: install-pre-commit
157211
if: inputs.basic-checks-only == 'true'
158212
steps:
159213
- name: "Cleanup repo"
@@ -165,18 +219,6 @@ jobs:
165219
persist-credentials: false
166220
- name: "Cleanup docker"
167221
run: ./scripts/ci/cleanup_docker.sh
168-
- name: "Setup python"
169-
uses: actions/setup-python@v5
170-
with:
171-
python-version: ${{ inputs.default-python-version }}
172-
cache: 'pip'
173-
cache-dependency-path: ./dev/breeze/pyproject.toml
174-
- name: "Setup python"
175-
uses: actions/setup-python@v5
176-
with:
177-
python-version: "${{ inputs.default-python-version }}"
178-
cache: 'pip'
179-
cache-dependency-path: ./dev/breeze/pyproject.toml
180222
- name: "Install Breeze"
181223
uses: ./.github/actions/breeze
182224
id: breeze
@@ -216,6 +258,7 @@ jobs:
216258
timeout-minutes: 45
217259
name: "Upgrade checks"
218260
runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }}
261+
needs: install-pre-commit
219262
env:
220263
PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}"
221264
if: inputs.canary-run == 'true' && inputs.latest-versions-only != 'true'
@@ -229,12 +272,14 @@ jobs:
229272
persist-credentials: false
230273
- name: "Cleanup docker"
231274
run: ./scripts/ci/cleanup_docker.sh
232-
# Install python from scratch. No cache used. We always want to have fresh version of everything
233-
- uses: actions/setup-python@v5
275+
- name: "Install Breeze"
276+
uses: ./.github/actions/breeze
277+
id: breeze
278+
- name: "Install pre-commit"
279+
uses: ./.github/actions/install-pre-commit
280+
id: pre-commit
234281
with:
235-
python-version: "${{ inputs.default-python-version }}"
236-
- name: "Install latest pre-commit"
237-
run: pip install pre-commit
282+
python-version: ${{steps.breeze.outputs.host-python-version}}
238283
- name: "Autoupdate all pre-commits"
239284
run: pre-commit autoupdate
240285
- name: "Run automated upgrade for black"

.github/workflows/ci-image-build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,8 @@ jobs:
159159
- name: "Stash CI docker image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}"
160160
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
161161
with:
162-
key: "ci-image-save-${{ inputs.platform }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }}"
162+
key: ci-image-save-${{ inputs.platform }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }}
163163
path: "/tmp/ci-image-save-*-${{ env.PYTHON_MAJOR_MINOR_VERSION }}.tar"
164164
if-no-files-found: 'error'
165-
retention-days: 2
165+
retention-days: '2'
166166
if: inputs.upload-image-artifact == 'true'

.github/workflows/ci-image-checks.yml

Lines changed: 57 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,51 @@ on: # yamllint disable-line rule:truthy
106106
type: string
107107

108108
jobs:
109+
install-pre-commit:
110+
timeout-minutes: 5
111+
name: "Install pre-commit for cache"
112+
runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }}
113+
env:
114+
PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}"
115+
if: inputs.basic-checks-only == 'false'
116+
steps:
117+
- name: "Cleanup repo"
118+
shell: bash
119+
run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*"
120+
- name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
121+
uses: actions/checkout@v4
122+
with:
123+
persist-credentials: false
124+
- name: "Install Breeze"
125+
uses: ./.github/actions/breeze
126+
id: breeze
127+
- name: "Install pre-commit"
128+
uses: ./.github/actions/install-pre-commit
129+
id: pre-commit
130+
with:
131+
python-version: ${{steps.breeze.outputs.host-python-version}}
132+
- name: "Prepare .tar file from pre-commit cache"
133+
run: |
134+
tar -C ~ -czf /tmp/cache-pre-commit.tar.gz .cache/pre-commit .cache/uv
135+
shell: bash
136+
# Saving pre-commit cache should happen only in one job in the entire workflow - because otherwise
137+
# it will cause 409 conflict errors - see https://github.com/actions/upload-artifact/issues/478
138+
# the way it works with airflow - even if the same action is in "basic-tests" the if condition
139+
# above `if: inputs.basic-checks-only == 'false'` will prevent it from running in the other job
140+
- name: "Save pre-commit cache"
141+
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
142+
with:
143+
# yamllint disable rule:line-length
144+
key: cache-pre-commit-v4-${{ steps.breeze.outputs.host-python-version }}-${{ hashFiles('.pre-commit-config.yaml') }}
145+
path: /tmp/cache-pre-commit.tar.gz
146+
if-no-files-found: 'error'
147+
retention-days: '2'
148+
109149
static-checks:
110150
timeout-minutes: 45
111151
name: "Static checks"
112152
runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }}
153+
needs: install-pre-commit
113154
env:
114155
PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}"
115156
UPGRADE_TO_NEWER_DEPENDENCIES: "${{ inputs.upgrade-to-newer-dependencies }}"
@@ -123,12 +164,6 @@ jobs:
123164
uses: actions/checkout@v4
124165
with:
125166
persist-credentials: false
126-
- name: "Setup python"
127-
uses: actions/setup-python@v5
128-
with:
129-
python-version: ${{ inputs.default-python-version }}
130-
cache: 'pip'
131-
cache-dependency-path: ./dev/breeze/pyproject.toml
132167
- name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}"
133168
uses: ./.github/actions/prepare_breeze_and_image
134169
with:
@@ -154,6 +189,7 @@ jobs:
154189
timeout-minutes: 45
155190
name: "MyPy checks"
156191
runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }}
192+
needs: install-pre-commit
157193
if: inputs.needs-mypy == 'true'
158194
strategy:
159195
fail-fast: false
@@ -221,24 +257,31 @@ jobs:
221257
with:
222258
platform: "linux/amd64"
223259
python: ${{ inputs.default-python-version }}
224-
- uses: actions/cache@v4
225-
id: cache-doc-inventories
260+
- name: "Restore docs inventory cache"
261+
uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
226262
with:
227263
path: ./docs/_inventory_cache/
228-
key: docs-inventory-${{ hashFiles('pyproject.toml;') }}
229-
restore-keys: |
230-
docs-inventory-${{ hashFiles('pyproject.toml;') }}
231-
docs-inventory-
264+
# TODO(potiuk): do better with determining the key
265+
key: cache-docs-inventory-v1-${{ hashFiles('pyproject.toml') }}
266+
id: restore-docs-inventory-cache
232267
- name: "Building docs with ${{ matrix.flag }} flag"
233268
run: >
234269
breeze build-docs ${{ inputs.docs-list-as-string }} ${{ matrix.flag }}
270+
- name: "Save docs inventory cache"
271+
uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c
272+
with:
273+
path: ./docs/_inventory_cache/
274+
key: cache-docs-inventory-v1-${{ hashFiles('pyproject.toml') }}
275+
if-no-files-found: 'error'
276+
retention-days: '2'
277+
if: steps.restore-docs-inventory-cache != 'true'
235278
- name: "Upload build docs"
236279
uses: actions/upload-artifact@v4
237280
with:
238281
name: airflow-docs
239282
path: './docs/_build'
240-
retention-days: 7
241-
if-no-files-found: error
283+
retention-days: '7'
284+
if-no-files-found: 'error'
242285
if: matrix.flag == '--docs-only'
243286

244287
publish-docs:

0 commit comments

Comments
 (0)