Skip to content

Commit 084c9e7

Browse files
Probe fixes. Some probes have exec instead of direct command as we found k8s complaining about having a handler type defined out of HTTP, TCP, Exec and gRPC.
1 parent 1ba7a69 commit 084c9e7

8 files changed

+792
-1
lines changed

chart/templates/_helpers.yaml

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,12 @@ If release name contains chart name it will be used as a full name.
327327
{{- if and .Values.dags.gitSync.containerLifecycleHooks (not .is_init) }}
328328
lifecycle: {{- tpl (toYaml .Values.dags.gitSync.containerLifecycleHooks) . | nindent 4 }}
329329
{{- end }}
330+
{{- if .Values.dags.gitSync.livenessProbe }}
331+
livenessProbe: {{- toYaml .Values.dags.gitSync.livenessProbe | nindent 4 }}
332+
{{- end }}
333+
{{- if .Values.dags.gitSync.readinessProbe }}
334+
readinessProbe: {{- toYaml .Values.dags.gitSync.readinessProbe | nindent 4 }}
335+
{{- end }}
330336
{{- end }}
331337

332338
{{/* This helper will change when customers deploy a new image */}}
@@ -794,7 +800,6 @@ server_tls_key_file = /etc/pgbouncer/server.key
794800
{{- end }}
795801
{{- end }}
796802

797-
798803
{{- define "scheduler_startup_check_command" }}
799804
{{- if semverCompare ">=2.5.0" .Values.airflowVersion }}
800805
- sh
@@ -827,6 +832,39 @@ server_tls_key_file = /etc/pgbouncer/server.key
827832
{{- end }}
828833
{{- end }}
829834

835+
{{- define "scheduler_readiness_check_command" }}
836+
{{- if semverCompare ">=2.5.0" .Values.airflowVersion }}
837+
- sh
838+
- -c
839+
- |
840+
CONNECTION_CHECK_MAX_COUNT=0 AIRFLOW__LOGGING__LOGGING_LEVEL=ERROR exec /entrypoint \
841+
airflow jobs check --job-type SchedulerJob --local
842+
{{- else if semverCompare ">=2.1.0" .Values.airflowVersion }}
843+
- sh
844+
- -c
845+
- |
846+
CONNECTION_CHECK_MAX_COUNT=0 AIRFLOW__LOGGING__LOGGING_LEVEL=ERROR exec /entrypoint \
847+
airflow jobs check --job-type SchedulerJob --hostname $(hostname)
848+
{{- else }}
849+
- sh
850+
- -c
851+
- |
852+
CONNECTION_CHECK_MAX_COUNT=0 exec /entrypoint python -Wignore -c "
853+
import os
854+
os.environ[\"AIRFLOW__CORE__LOGGING_LEVEL\"] = \"ERROR\"
855+
os.environ[\"AIRFLOW__LOGGING__LOGGING_LEVEL\"] = \"ERROR\"
856+
from airflow.jobs.scheduler_job import SchedulerJob as Job
857+
from airflow.utils.net import get_hostname
858+
from airflow.jobs.job import most_recent_job
859+
job = most_recent_job(Job, get_hostname())
860+
if not job:
861+
exit(1)
862+
# If the scheduler is running, it should have a heartbeat timestamp
863+
exit(0 if job.latest_heartbeat else 1)
864+
"
865+
{{- end }}
866+
{{- end }}
867+
830868
{{- define "triggerer_liveness_check_command" }}
831869
{{- if semverCompare ">=2.5.0" .Values.airflowVersion }}
832870
- sh
@@ -843,6 +881,22 @@ server_tls_key_file = /etc/pgbouncer/server.key
843881
{{- end }}
844882
{{- end }}
845883

884+
{{- define "triggerer_readiness_check_command" }}
885+
{{- if semverCompare ">=2.5.0" .Values.airflowVersion }}
886+
- sh
887+
- -c
888+
- |
889+
CONNECTION_CHECK_MAX_COUNT=0 AIRFLOW__LOGGING__LOGGING_LEVEL=ERROR exec /entrypoint \
890+
airflow jobs check --job-type TriggererJob --local
891+
{{- else }}
892+
- sh
893+
- -c
894+
- |
895+
CONNECTION_CHECK_MAX_COUNT=0 AIRFLOW__LOGGING__LOGGING_LEVEL=ERROR exec /entrypoint \
896+
airflow jobs check --job-type TriggererJob --hostname $(hostname)
897+
{{- end }}
898+
{{- end }}
899+
846900
{{- define "dag_processor_liveness_check_command" }}
847901
{{- $commandArgs := (list) -}}
848902
{{- if semverCompare ">=2.5.0" .Values.airflowVersion }}

chart/templates/cleanup/cleanup-cronjob.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ spec:
110110
{{- if .Values.volumeMounts }}
111111
{{- toYaml .Values.volumeMounts | nindent 16 }}
112112
{{- end }}
113+
{{- if .Values.cleanup.livenessProbe }}
114+
livenessProbe: {{- toYaml .Values.cleanup.livenessProbe | nindent 16 }}
115+
{{- end }}
116+
{{- if .Values.cleanup.readinessProbe }}
117+
readinessProbe: {{- toYaml .Values.cleanup.readinessProbe | nindent 16 }}
118+
{{- end }}
113119
resources: {{- toYaml .Values.cleanup.resources | nindent 16 }}
114120
volumes:
115121
- name: config

chart/templates/jobs/migrate-database-job.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,12 @@ spec:
120120
{{- if .Values.migrateDatabaseJob.env }}
121121
{{- tpl (toYaml .Values.migrateDatabaseJob.env) $ | nindent 12 }}
122122
{{- end }}
123+
{{- if .Values.migrateDatabaseJob.livenessProbe }}
124+
livenessProbe: {{- toYaml .Values.migrateDatabaseJob.livenessProbe | nindent 12 }}
125+
{{- end }}
126+
{{- if .Values.migrateDatabaseJob.readinessProbe }}
127+
readinessProbe: {{- toYaml .Values.migrateDatabaseJob.readinessProbe | nindent 12 }}
128+
{{- end }}
123129
resources: {{- toYaml .Values.migrateDatabaseJob.resources | nindent 12 }}
124130
volumeMounts:
125131
{{- include "airflow_config_mount" . | nindent 12 }}

chart/templates/redis/redis-statefulset.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,34 @@ spec:
101101
ports:
102102
- name: redis-db
103103
containerPort: {{ .Values.ports.redisDB }}
104+
livenessProbe:
105+
initialDelaySeconds: {{ .Values.redis.livenessProbe.initialDelaySeconds }}
106+
timeoutSeconds: {{ .Values.redis.livenessProbe.timeoutSeconds }}
107+
failureThreshold: {{ .Values.redis.livenessProbe.failureThreshold }}
108+
periodSeconds: {{ .Values.redis.livenessProbe.periodSeconds }}
109+
exec:
110+
command:
111+
{{- if .Values.redis.livenessProbe.command }}
112+
{{- toYaml .Values.redis.livenessProbe.command | nindent 16 }}
113+
{{- else }}
114+
- sh
115+
- -c
116+
- redis-cli -a $REDIS_PASSWORD ping
117+
{{- end }}
118+
readinessProbe:
119+
initialDelaySeconds: {{ .Values.redis.readinessProbe.initialDelaySeconds }}
120+
timeoutSeconds: {{ .Values.redis.readinessProbe.timeoutSeconds }}
121+
failureThreshold: {{ .Values.redis.readinessProbe.failureThreshold }}
122+
periodSeconds: {{ .Values.redis.readinessProbe.periodSeconds }}
123+
exec:
124+
command:
125+
{{- if .Values.redis.readinessProbe.command }}
126+
{{- toYaml .Values.redis.readinessProbe.command | nindent 16 }}
127+
{{- else }}
128+
- sh
129+
- -c
130+
- redis-cli -a $REDIS_PASSWORD ping
131+
{{- end }}
104132
volumeMounts:
105133
- name: redis-db
106134
mountPath: /data

chart/templates/scheduler/scheduler-deployment.yaml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,18 @@ spec:
206206
{{- else }}
207207
{{- include "scheduler_liveness_check_command" . | indent 14 }}
208208
{{- end }}
209+
readinessProbe:
210+
initialDelaySeconds: {{ .Values.scheduler.readinessProbe.initialDelaySeconds }}
211+
timeoutSeconds: {{ .Values.scheduler.readinessProbe.timeoutSeconds }}
212+
failureThreshold: {{ .Values.scheduler.readinessProbe.failureThreshold }}
213+
periodSeconds: {{ .Values.scheduler.readinessProbe.periodSeconds }}
214+
exec:
215+
command:
216+
{{- if .Values.scheduler.readinessProbe.command }}
217+
{{- toYaml .Values.scheduler.readinessProbe.command | nindent 16 }}
218+
{{- else }}
219+
{{- include "scheduler_readiness_check_command" . | indent 14 }}
220+
{{- end }}
209221
startupProbe:
210222
timeoutSeconds: {{ .Values.scheduler.startupProbe.timeoutSeconds }}
211223
failureThreshold: {{ .Values.scheduler.startupProbe.failureThreshold }}
@@ -258,6 +270,41 @@ spec:
258270
{{- if $containerLifecycleHooksLogGroomerSidecar }}
259271
lifecycle: {{- tpl (toYaml $containerLifecycleHooksLogGroomerSidecar) . | nindent 12 }}
260272
{{- end }}
273+
# Add livenessProbe
274+
{{- if hasKey .Values.scheduler.logGroomerSidecar "livenessProbe" }}
275+
livenessProbe:
276+
initialDelaySeconds: {{ .Values.scheduler.logGroomerSidecar.livenessProbe.initialDelaySeconds }}
277+
timeoutSeconds: {{ .Values.scheduler.logGroomerSidecar.livenessProbe.timeoutSeconds }}
278+
failureThreshold: {{ .Values.scheduler.logGroomerSidecar.livenessProbe.failureThreshold }}
279+
periodSeconds: {{ .Values.scheduler.logGroomerSidecar.livenessProbe.periodSeconds }}
280+
exec:
281+
command:
282+
{{- if hasKey .Values.scheduler.logGroomerSidecar.livenessProbe "command" }}
283+
{{- toYaml .Values.scheduler.logGroomerSidecar.livenessProbe.command | nindent 16 }}
284+
{{- else }}
285+
- /bin/sh
286+
- -c
287+
- ps aux | grep airflow | grep -v grep
288+
{{- end }}
289+
{{- end }}
290+
291+
# Add readinessProbe
292+
{{- if hasKey .Values.scheduler.logGroomerSidecar "readinessProbe" }}
293+
readinessProbe:
294+
initialDelaySeconds: {{ .Values.scheduler.logGroomerSidecar.readinessProbe.initialDelaySeconds }}
295+
timeoutSeconds: {{ .Values.scheduler.logGroomerSidecar.readinessProbe.timeoutSeconds }}
296+
failureThreshold: {{ .Values.scheduler.logGroomerSidecar.readinessProbe.failureThreshold }}
297+
periodSeconds: {{ .Values.scheduler.logGroomerSidecar.readinessProbe.periodSeconds }}
298+
exec:
299+
command:
300+
{{- if hasKey .Values.scheduler.logGroomerSidecar.readinessProbe "command" }}
301+
{{- toYaml .Values.scheduler.logGroomerSidecar.readinessProbe.command | nindent 16 }}
302+
{{- else }}
303+
- /bin/sh
304+
- -c
305+
- test -d {{ template "airflow_logs" . }}
306+
{{- end }}
307+
{{- end }}
261308
{{- if .Values.scheduler.logGroomerSidecar.command }}
262309
command: {{ tpl (toYaml .Values.scheduler.logGroomerSidecar.command) . | nindent 12 }}
263310
{{- end }}

chart/templates/triggerer/triggerer-deployment.yaml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,18 @@ spec:
216216
{{- else }}
217217
{{- include "triggerer_liveness_check_command" . | indent 14 }}
218218
{{- end }}
219+
readinessProbe:
220+
initialDelaySeconds: {{ .Values.triggerer.readinessProbe.initialDelaySeconds }}
221+
timeoutSeconds: {{ .Values.triggerer.readinessProbe.timeoutSeconds }}
222+
failureThreshold: {{ .Values.triggerer.readinessProbe.failureThreshold }}
223+
periodSeconds: {{ .Values.triggerer.readinessProbe.periodSeconds }}
224+
exec:
225+
command:
226+
{{- if .Values.triggerer.readinessProbe.command }}
227+
{{- toYaml .Values.triggerer.readinessProbe.command | nindent 16 }}
228+
{{- else }}
229+
{{- include "triggerer_readiness_check_command" . | indent 14 }}
230+
{{- end }}
219231
{{- /* Airflow version 2.6.0 is when triggerer logs serve introduced */ -}}
220232
{{- if semverCompare ">=2.6.0" .Values.airflowVersion }}
221233
ports:
@@ -234,6 +246,38 @@ spec:
234246
{{- if $containerLifecycleHooksLogGroomerSidecar }}
235247
lifecycle: {{- tpl (toYaml $containerLifecycleHooksLogGroomerSidecar) . | nindent 12 }}
236248
{{- end }}
249+
{{- if hasKey .Values.triggerer.logGroomerSidecar "livenessProbe" }}
250+
livenessProbe:
251+
initialDelaySeconds: {{ .Values.triggerer.logGroomerSidecar.livenessProbe.initialDelaySeconds }}
252+
timeoutSeconds: {{ .Values.triggerer.logGroomerSidecar.livenessProbe.timeoutSeconds }}
253+
failureThreshold: {{ .Values.triggerer.logGroomerSidecar.livenessProbe.failureThreshold }}
254+
periodSeconds: {{ .Values.triggerer.logGroomerSidecar.livenessProbe.periodSeconds }}
255+
exec:
256+
command:
257+
{{- if hasKey .Values.triggerer.logGroomerSidecar.livenessProbe "command" }}
258+
{{- toYaml .Values.triggerer.logGroomerSidecar.livenessProbe.command | nindent 16 }}
259+
{{- else }}
260+
- bash
261+
- -c
262+
- airflow jobs check --job-type TriggererJob
263+
{{- end }}
264+
{{- end }}
265+
{{- if hasKey .Values.triggerer.logGroomerSidecar "readinessProbe" }}
266+
readinessProbe:
267+
initialDelaySeconds: {{ .Values.triggerer.logGroomerSidecar.readinessProbe.initialDelaySeconds }}
268+
timeoutSeconds: {{ .Values.triggerer.logGroomerSidecar.readinessProbe.timeoutSeconds }}
269+
failureThreshold: {{ .Values.triggerer.logGroomerSidecar.readinessProbe.failureThreshold }}
270+
periodSeconds: {{ .Values.triggerer.logGroomerSidecar.readinessProbe.periodSeconds }}
271+
exec:
272+
command:
273+
{{- if hasKey .Values.triggerer.logGroomerSidecar.readinessProbe "command" }}
274+
{{- toYaml .Values.triggerer.logGroomerSidecar.readinessProbe.command | nindent 16 }}
275+
{{- else }}
276+
- /bin/sh
277+
- -c
278+
- airflow jobs check --job-type TriggererJob
279+
{{- end }}
280+
{{- end }}
237281
{{- if .Values.triggerer.logGroomerSidecar.command }}
238282
command: {{ tpl (toYaml .Values.triggerer.logGroomerSidecar.command) . | nindent 12 }}
239283
{{- end }}

chart/templates/workers/worker-deployment.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,22 @@ spec:
259259
- CONNECTION_CHECK_MAX_COUNT=0 exec /entrypoint python -m celery --app {{ include "celery_executor_namespace" . }} inspect ping -d celery@$(hostname)
260260
{{- end }}
261261
{{- end }}
262+
{{- if .Values.workers.readinessProbe.enabled }}
263+
readinessProbe:
264+
initialDelaySeconds: {{ .Values.workers.readinessProbe.initialDelaySeconds }}
265+
timeoutSeconds: {{ .Values.workers.readinessProbe.timeoutSeconds }}
266+
failureThreshold: {{ .Values.workers.readinessProbe.failureThreshold }}
267+
periodSeconds: {{ .Values.workers.readinessProbe.periodSeconds }}
268+
exec:
269+
command:
270+
{{- if .Values.workers.readinessProbe.command }}
271+
{{- toYaml .Values.workers.readinessProbe.command | nindent 16 }}
272+
{{- else }}
273+
- sh
274+
- -c
275+
- CONNECTION_CHECK_MAX_COUNT=0 exec /entrypoint python -m celery --app {{ include "celery_executor_namespace" . }} inspect ping -d celery@$(hostname)
276+
{{- end }}
277+
{{- end }}
262278
ports:
263279
- name: worker-logs
264280
containerPort: {{ .Values.ports.workerLogs }}

0 commit comments

Comments
 (0)