Skip to content

Commit

Permalink
Merge pull request #143 from Fokko/update-to-airflow-19
Browse files Browse the repository at this point in the history
Update to Apache Airflow 1.9
  • Loading branch information
puckel authored Jan 3, 2018
2 parents f4cf78c + 27b83eb commit 017f07b
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 12 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ Session.vim

# sftp configuration file
sftp-config.json
*.pyc

# Python
__pycache__
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ RUN set -ex \
&& pip install ndg-httpsclient \
&& pip install pyasn1 \
&& pip install apache-airflow[crypto,celery,postgres,hive,jdbc]==$AIRFLOW_VERSION \
&& pip install celery[redis] \
&& pip install celery[redis]==4.0.2 \
&& apt-get purge --auto-remove -yqq $buildDeps \
&& apt-get clean \
&& rm -rf \
Expand Down
78 changes: 68 additions & 10 deletions config/airflow.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,26 @@ dags_folder = /usr/local/airflow/dags
base_log_folder = /usr/local/airflow/logs

# Airflow can store logs remotely in AWS S3 or Google Cloud Storage. Users
# must supply a remote location URL (starting with either 's3://...' or
# 'gs://...') and an Airflow connection id that provides access to the storage
# must supply an Airflow connection id that provides access to the storage
# location.
remote_base_log_folder =
remote_log_conn_id =
# Use server-side encryption for logs stored in S3
encrypt_s3_logs = False
# DEPRECATED option for remote log storage, use remote_base_log_folder instead!
s3_log_folder =

# Logging level
logging_level = INFO

# Logging class
# Specify the class that will specify the logging configuration
# This class has to be on the python classpath
# logging_config_class = my.path.default_local_settings.LOGGING_CONFIG
logging_config_class =

# Log format
log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s

# The executor class that airflow should use. Choices include
# SequentialExecutor, LocalExecutor, CeleryExecutor
# SequentialExecutor, LocalExecutor, CeleryExecutor, DaskExecutor
executor = CeleryExecutor

# The SqlAlchemy connection string to the metadata database.
Expand Down Expand Up @@ -89,6 +97,18 @@ security =
# values at runtime)
unit_test_mode = False

# Name of handler to read task instance logs.
# Default to use file task handler.
task_log_reader = file.task

# Whether to enable pickling for xcom (note that this is insecure and allows for
# RCE exploits). This will be deprecated in Airflow 2.0 (be forced to False).
enable_xcom_pickling = True

# When a task is killed forcefully, this is the amount of time in seconds that
# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED
killed_task_cleanup_time = 60

[cli]
# In what way should the cli access the API. The LocalClient will use the
# database directly, while the json_client will use the api running on the
Expand Down Expand Up @@ -168,6 +188,10 @@ filter_by_owner = False
# in order to user the ldapgroup mode.
owner_mode = user

# Default DAG view. Valid values are:
# tree, graph, duration, gantt, landing_times
dag_default_view = tree

# Default DAG orientation. Valid values are:
# LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top)
dag_orientation = LR
Expand All @@ -184,6 +208,9 @@ log_fetch_timeout_sec = 5
# DAGs by default
hide_paused_dags_by_default = False

# Consistent page size across all listing views in the UI
page_size = 100

[email]
email_backend = airflow.utils.email.send_email_smtp

Expand All @@ -198,7 +225,7 @@ smtp_ssl = False
# smtp_user = airflow
# smtp_password = airflow
smtp_port = 25
smtp_mail_from = airflow@airflow.com
smtp_mail_from = airflow@example.com

[celery]
# This section only applies if you are using the CeleryExecutor in
Expand Down Expand Up @@ -238,6 +265,19 @@ flower_port = 5555
# Default queue that tasks get assigned to and that worker listen on.
default_queue = default

# Import path for celery configuration options
celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG

# No SSL
celery_ssl_active = False

[dask]
# This section only applies if you are using the DaskExecutor in
# [core] section above

# The IP address and port of the Dask cluster's scheduler.
cluster_address = 127.0.0.1:8786

[scheduler]
# Task instances listen for external kill signal (when you clear tasks
# from the CLI or the UI), this defines the frequency at which they should
Expand Down Expand Up @@ -276,19 +316,37 @@ scheduler_zombie_task_threshold = 300
# DAG definition (catchup)
catchup_by_default = True

# This changes the batch size of queries in the scheduling main loop.
# This depends on query length limits and how long you are willing to hold locks.
# 0 for no limit
max_tis_per_query = 0

# Statsd (https://github.com/etsy/statsd) integration settings
statsd_on = False
statsd_host = localhost
statsd_port = 8125
statsd_prefix = airflow

# The scheduler can run multiple threads in parallel to schedule dags.
# This defines how many threads will run. However airflow will never
# use more threads than the amount of cpu cores available.
# This defines how many threads will run.
max_threads = 2

authenticate = False

[ldap]
# set this to ldaps://<your.ldap.server>:<port>
uri =
user_filter = objectClass=*
user_name_attr = uid
group_member_attr = memberOf
superuser_filter =
data_profiler_filter =
bind_user = cn=Manager,dc=example,dc=com
bind_password = insecure
basedn = dc=example,dc=com
cacert = /etc/ca/ldap_ca.crt
search_scope = LEVEL

[mesos]
# Mesos master address which MesosExecutor will connect to.
master = localhost:5050
Expand Down

0 comments on commit 017f07b

Please sign in to comment.