Bump to Airflow 1.10.4 and Python 3.7

puckel · puckel · commit 7336340afda0 · 2019-08-07T10:35:49.000+02:00
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -9,14 +9,18 @@ jobs:
       - checkout
       - setup_remote_docker:
           docker_layer_caching: true
-      - run: 
+      - run:
           name: Build docker image
           command: |
             docker build -t puckel/docker-airflow .
-      - run: 
+      - run:
+          name: Test Python version
+          command: |
+            docker run puckel/docker-airflow python -V | grep '3.7'
+      - run:
           name: Test docker image
           command: |
-            docker run puckel/docker-airflow version |grep '1.10.3'
+            docker run puckel/docker-airflow version |grep '1.10.4'
 workflows:
   version: 2
   build_and_test:
diff --git a/Dockerfile b/Dockerfile
@@ -1,18 +1,18 @@
-# VERSION 1.10.3
+# VERSION 1.10.4
 # AUTHOR: Matthieu "Puckel_" Roisil
 # DESCRIPTION: Basic Airflow container
 # BUILD: docker build --rm -t puckel/docker-airflow .
 # SOURCE: https://github.com/puckel/docker-airflow
 
-FROM python:3.6-slim-stretch
+FROM python:3.7-slim-stretch
 LABEL maintainer="Puckel_"
 
 # Never prompts the user for choices on installation/configuration of packages
 ENV DEBIAN_FRONTEND noninteractive
 ENV TERM linux
 
 # Airflow
-ARG AIRFLOW_VERSION=1.10.3
+ARG AIRFLOW_VERSION=1.10.4
 ARG AIRFLOW_USER_HOME=/usr/local/airflow
 ARG AIRFLOW_DEPS=""
 ARG PYTHON_DEPS=""
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@ This repository contains **Dockerfile** of [apache-airflow](https://github.com/a
 
 ## Informations
 
-* Based on Python (3.6-slim-stretch) official Image [python:3.6-slim-stretch](https://hub.docker.com/_/python/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue
+* Based on Python (3.7-slim-stretch) official Image [python:3.7-slim-stretch](https://hub.docker.com/_/python/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue
 * Install [Docker](https://www.docker.com/)
 * Install [Docker Compose](https://docs.docker.com/compose/install/)
 * Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/apache-airflow)
diff --git a/config/airflow.cfg b/config/airflow.cfg
@@ -28,7 +28,11 @@ fab_logging_level = WARN
 logging_config_class =
 
 # Log format
-# we need to escape the curly braces by adding an additional curly brace
+# Colour the logs when the controlling terminal is a TTY.
+colored_console_log = True
+colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {{%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d}} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
+colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter
+
 log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
 simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
 
@@ -54,16 +58,26 @@ executor = SequentialExecutor
 # their website
 # sql_alchemy_conn = sqlite:////tmp/airflow.db
 
-# If SqlAlchemy should pool database connections.
-sql_alchemy_pool_enabled = True
-
 # The encoding for the databases
 sql_engine_encoding = utf-8
 
+# If SqlAlchemy should pool database connections.
+sql_alchemy_pool_enabled = True
+
 # The SqlAlchemy pool size is the maximum number of database connections
 # in the pool. 0 indicates no limit.
 sql_alchemy_pool_size = 5
 
+# The maximum overflow size of the pool.
+# When the number of checked-out connections reaches the size set in pool_size,
+# additional connections will be returned up to this limit.
+# When those additional connections are returned to the pool, they are disconnected and discarded.
+# It follows then that the total number of simultaneous connections the pool will allow is pool_size + max_overflow,
+# and the total number of "sleeping" connections the pool will allow is pool_size.
+# max_overflow can be set to -1 to indicate no overflow limit;
+# no limit will be placed on the total number of concurrent connections. Defaults to 10.
+sql_alchemy_max_overflow = 10
+
 # The SqlAlchemy pool recycle is the number of seconds a connection
 # can be idle in the pool before it is invalidated. This config does
 # not apply to sqlite. If the number of DB connections is ever exceeded,
@@ -182,7 +196,7 @@ password =
 [operators]
 # The default owner assigned to each new operator, unless
 # provided explicitly or passed via `default_args`
-default_owner = Airflow
+default_owner = airflow
 default_cpus = 1
 default_ram = 512
 default_disk = 512
@@ -191,9 +205,6 @@ default_gpus = 0
 [hive]
 # Default mapreduce queue for HiveOperator tasks
 default_hive_mapred_queue =
-# Template for mapred_job_name in HiveOperator, supports the following named parameters:
-# hostname, dag_id, task_id, execution_date
-mapred_job_name_template = Airflow HiveOperator task for {hostname}.{dag_id}.{task_id}.{execution_date}
 
 [webserver]
 # The base url of your website as airflow cannot guess what domain or
@@ -301,6 +312,9 @@ cookie_secure = False
 # Set samesite policy on session cookie
 cookie_samesite =
 
+# Default setting for wrap toggle on DAG code and TI log views.
+default_wrap = False
+
 [email]
 email_backend = airflow.utils.email.send_email_smtp
 
@@ -391,6 +405,13 @@ ssl_key =
 ssl_cert =
 ssl_cacert =
 
+# Celery Pool implementation.
+# Choices include: prefork (default), eventlet, gevent or solo.
+# See:
+#   https://docs.celeryproject.org/en/latest/userguide/workers.html#concurrency
+#   https://docs.celeryproject.org/en/latest/userguide/concurrency/eventlet.html
+pool = prefork
+
 [celery_broker_transport_options]
 # This section is for specifying options which can be passed to the
 # underlying celery broker transport.  See:
@@ -505,8 +526,8 @@ basedn = dc=example,dc=com
 cacert = /etc/ca/ldap_ca.crt
 search_scope = LEVEL
 
-# This setting allows the use of LDAP servers that either return a 
-# broken schema, or do not return a schema. 
+# This setting allows the use of LDAP servers that either return a
+# broken schema, or do not return a schema.
 ignore_malformed_schema = False
 
 [mesos]
@@ -567,10 +588,22 @@ api_rev = v3
 hide_sensitive_variable_fields = True
 
 [elasticsearch]
-elasticsearch_host =
-# we need to escape the curly braces by adding an additional curly brace
-elasticsearch_log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number}
-elasticsearch_end_of_log_mark = end_of_log
+# Elasticsearch host
+host =
+# Format of the log_id, which is used to query for a given tasks logs
+log_id_template = {{dag_id}}-{{task_id}}-{{execution_date}}-{{try_number}}
+# Used to mark the end of a log stream for a task
+end_of_log_mark = end_of_log
+# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
+# Code will construct log_id using the log_id template from the argument above.
+# NOTE: The code will prefix the https:// automatically, don't include that here.
+frontend =
+# Write the task logs to the stdout of the worker, rather than the default files
+write_stdout = False
+# Instead of the default log formatter, write the log lines as JSON
+json_format = False
+# Log fields to also attach to the json output, if enabled
+json_fields = asctime, filename, lineno, levelname, message
 
 [kubernetes]
 # The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run
@@ -606,7 +639,6 @@ logs_volume_subpath =
 # A shared volume claim for the logs
 logs_volume_claim =
 
-
 # For DAGs mounted via a hostPath volume (mutually exclusive with volume claim and git-sync)
 # Useful in local environment, discouraged in production
 dags_volume_host =
@@ -634,7 +666,7 @@ git_password =
 git_sync_root = /git
 git_sync_dest = repo
 # Mount point of the volume if git-sync is being used.
-# i.e. /root/airflow/dags
+# i.e. {AIRFLOW_HOME}/dags
 git_dags_folder_mount_point =
 
 # To get Git-sync SSH authentication set up follow this format
@@ -705,6 +737,13 @@ affinity =
 #   https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#toleration-v1-core
 tolerations =
 
+# **kwargs parameters to pass while calling a kubernetes client core_v1_api methods from Kubernetes Executor
+# provided as a single line formatted JSON dictionary string.
+# List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis
+# See:
+#   https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py
+kube_client_request_args =
+
 # Worker pods security context options
 # See:
 #   https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
@@ -753,3 +792,9 @@ fs_group =
 #
 # Additionally you may override worker airflow settings with the AIRFLOW__<SECTION>__<KEY>
 # formatting as supported by airflow normally.
+
+[kubernetes_labels]
+# The Key-value pairs to be given to worker pods.
+# The worker pods will be given these static labels, as well as some additional dynamic labels
+# to identify the task.
+# Should be supplied in the format: key = value
diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml
@@ -1,7 +1,7 @@
 version: '2.1'
 services:
     redis:
-        image: 'redis:3.2.7'
+        image: 'redis:5.0.5'
         # command: redis-server --requirepass redispass
 
     postgres:
@@ -16,7 +16,7 @@ services:
         #     - ./pgdata:/var/lib/postgresql/data/pgdata
 
     webserver:
-        image: puckel/docker-airflow:1.10.3
+        image: puckel/docker-airflow:1.10.4
         restart: always
         depends_on:
             - postgres
@@ -43,7 +43,7 @@ services:
             retries: 3
 
     flower:
-        image: puckel/docker-airflow:1.10.3
+        image: puckel/docker-airflow:1.10.4
         restart: always
         depends_on:
             - redis
@@ -55,7 +55,7 @@ services:
         command: flower
 
     scheduler:
-        image: puckel/docker-airflow:1.10.3
+        image: puckel/docker-airflow:1.10.4
         restart: always
         depends_on:
             - webserver
@@ -74,7 +74,7 @@ services:
         command: scheduler
 
     worker:
-        image: puckel/docker-airflow:1.10.3
+        image: puckel/docker-airflow:1.10.4
         restart: always
         depends_on:
             - scheduler
diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml
@@ -8,7 +8,7 @@ services:
             - POSTGRES_DB=airflow
 
     webserver:
-        image: puckel/docker-airflow:1.10.3
+        image: puckel/docker-airflow:1.10.4
         restart: always
         depends_on:
             - postgres
diff --git a/script/entrypoint.sh b/script/entrypoint.sh
@@ -13,10 +13,12 @@ TRY_LOOP="20"
 : "${POSTGRES_DB:="airflow"}"
 
 # Defaults and back-compat
+: "${AIRFLOW_HOME:="/usr/local/airflow"}"
 : "${AIRFLOW__CORE__FERNET_KEY:=${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")}}"
 : "${AIRFLOW__CORE__EXECUTOR:=${EXECUTOR:-Sequential}Executor}"
 
 export \
+  AIRFLOW_HOME \
   AIRFLOW__CELERY__BROKER_URL \
   AIRFLOW__CELERY__RESULT_BACKEND \
   AIRFLOW__CORE__EXECUTOR \
@@ -33,7 +35,7 @@ fi
 
 # Install custom python package if requirements.txt is present
 if [ -e "/requirements.txt" ]; then
-    $(which pip) install --user -r /requirements.txt
+    $(command -v pip) install --user -r /requirements.txt
 fi
 
 if [ -n "$REDIS_PASSWORD" ]; then