diff --git a/Dockerfile b/Dockerfile index cfe7fb6a..b46c3a9d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.10.0-5 +# VERSION 1.10.1 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.10.0 +ARG AIRFLOW_VERSION=1.10.1 ARG AIRFLOW_HOME=/usr/local/airflow ARG AIRFLOW_DEPS="" ARG PYTHON_DEPS="" @@ -28,7 +28,6 @@ ENV LC_MESSAGES en_US.UTF-8 RUN set -ex \ && buildDeps=' \ freetds-dev \ - python3-dev \ libkrb5-dev \ libsasl2-dev \ libssl-dev \ @@ -42,8 +41,6 @@ RUN set -ex \ $buildDeps \ freetds-bin \ build-essential \ - python3-pip \ - python3-requests \ default-libmysqlclient-dev \ apt-utils \ curl \ @@ -60,7 +57,7 @@ RUN set -ex \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql,ssh${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \ - && pip install 'celery[redis]>=4.1.1,<4.2.0' \ + && pip install 'redis>=2.10.5,<3' \ && if [ -n "${PYTHON_DEPS}" ]; then pip install ${PYTHON_DEPS}; fi \ && apt-get purge --auto-remove -yqq $buildDeps \ && apt-get autoremove -yqq --purge \ diff --git a/config/airflow.cfg b/config/airflow.cfg index 61018bb0..1ea3b371 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -39,6 +39,7 @@ simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s # we need to escape the curly braces by adding an additional curly brace log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log log_processor_filename_template = {{ filename }}.log +dag_processor_manager_log_location = /usr/local/airflow/logs/dag_processor_manager/dag_processor_manager.log # Hostname by providing a path to a callable, which will resolve the hostname hostname_callable = socket:getfqdn @@ -59,6 +60,9 @@ executor = SequentialExecutor # If SqlAlchemy should pool database connections. sql_alchemy_pool_enabled = True +# The encoding for the databases +sql_engine_encoding = utf-8 + # The SqlAlchemy pool size is the maximum number of database connections # in the pool. 0 indicates no limit. sql_alchemy_pool_size = 5 @@ -73,6 +77,10 @@ sql_alchemy_pool_recycle = 1800 # disconnects. Setting this to 0 disables retries. sql_alchemy_reconnect_timeout = 300 +# The schema to use for the metadata database +# SqlAlchemy supports databases with the concept of multiple schemas. +sql_alchemy_schema = + # The amount of parallelism as a setting to the executor. This defines # the max number of task instances that should run simultaneously # on this airflow installation @@ -142,6 +150,9 @@ killed_task_cleanup_time = 60 # `airflow trigger_dag -c`, the key-value pairs will override the existing ones in params. dag_run_conf_overrides_params = False +# Worker initialisation check to validate Metadata Database connection +worker_precheck = False + [cli] # In what way should the cli access the API. The LocalClient will use the # database directly, while the json_client will use the api running on the @@ -180,6 +191,9 @@ default_gpus = 0 [hive] # Default mapreduce queue for HiveOperator tasks default_hive_mapred_queue = +# Template for mapred_job_name in HiveOperator, supports the following named parameters: +# hostname, dag_id, task_id, execution_date +mapred_job_name_template = Airflow HiveOperator task for {hostname}.{dag_id}.{task_id}.{execution_date} [webserver] # The base url of your website as airflow cannot guess what domain or @@ -227,7 +241,10 @@ access_logfile = - error_logfile = - # Expose the configuration file in the web server -expose_config = False +# This is only applicable for the flask-admin based web UI (non FAB-based). +# In the FAB-based web UI with RBAC feature, +# access to configuration is controlled by role permissions. +expose_config = True # Set to true to turn on authentication: # https://airflow.incubator.apache.org/security.html#web-authentication @@ -387,9 +404,7 @@ run_duration = -1 # after how much time a new DAGs should be picked up from the filesystem min_file_process_interval = 0 -# How many seconds to wait between file-parsing loops to prevent the logs from being spammed. -min_file_parsing_loop_time = 1 - +# How often (in seconds) to scan the DAGs directory for new files. Default to 5 minutes. dag_dir_list_interval = 300 # How often should stats be printed to the logs @@ -427,6 +442,10 @@ max_threads = 2 authenticate = False +# Turn off scheduler use of cron intervals by setting this to False. +# DAGs submitted manually in the web UI or with trigger_dag will still run. +use_job_schedule = True + [ldap] # set this to ldaps://: uri = @@ -491,7 +510,6 @@ reinit_frequency = 3600 kinit_path = kinit keytab = airflow.keytab - [github_enterprise] api_rev = v3 @@ -506,9 +524,11 @@ elasticsearch_log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number} elasticsearch_end_of_log_mark = end_of_log [kubernetes] -# The repository and tag of the Kubernetes Image for the Worker to Run +# The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run worker_container_repository = worker_container_tag = +worker_container_image_pull_policy = IfNotPresent +worker_dags_folder = # If True (default), worker pods will be deleted upon termination delete_worker_pods = True @@ -562,6 +582,11 @@ gcp_service_account_keys = # It will raise an exception if called from a process not running in a kubernetes environment. in_cluster = True +[kubernetes_node_selectors] +# The Key-value pairs to be given to worker pods. +# The worker pods will be scheduled to the nodes of the specified key-value pairs. +# Should be supplied in the format: key = value + [kubernetes_secrets] # The scheduler mounts the following secrets into your workers as they are launched by the # scheduler. You may define as many secrets as needed and the kubernetes launcher will parse the diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 95b32c1d..3986eaa7 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.10.0-5 + image: puckel/docker-airflow:1.10.1 restart: always depends_on: - postgres @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.10.0-5 + image: puckel/docker-airflow:1.10.1 restart: always depends_on: - redis @@ -55,7 +55,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.10.0-5 + image: puckel/docker-airflow:1.10.1 restart: always depends_on: - webserver @@ -74,7 +74,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.10.0-5 + image: puckel/docker-airflow:1.10.1 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index dea68094..2a5c393a 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.10.0-5 + image: puckel/docker-airflow:1.10.1 restart: always depends_on: - postgres diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 90859270..fb3f9ad1 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -82,6 +82,7 @@ case "$1" in exec airflow "$@" ;; flower) + sleep 10 exec airflow "$@" ;; version)