Skip to content

Commit

Permalink
Requirements now depend on python version (apache#7841)
Browse files Browse the repository at this point in the history
  • Loading branch information
potiuk authored Mar 27, 2020
1 parent 3cc631e commit 3fb5f15
Show file tree
Hide file tree
Showing 40 changed files with 628 additions and 218 deletions.
2 changes: 1 addition & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
!MANIFEST.in
!NOTICE
!.github
!requirements.txt
!requirements

# Avoid triggering context change on README change (new companies using Airflow)
# So please do not uncomment this line ;)
Expand Down
9 changes: 1 addition & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -289,17 +289,10 @@ repos:
pass_filenames: true
- id: build
name: Check if image build is needed
entry: ./scripts/ci/pre_commit_ci_build.sh
entry: ./scripts/ci/pre_commit_ci_build.sh 3.6 false
language: system
always_run: true
pass_filenames: false
- id: generate-requirements
name: Generate requirements
entry: "./scripts/ci/pre_commit_generate_requirements.sh"
language: system
files: ^setup.py$
pass_filenames: false
require_serial: true
- id: check-apache-license
name: Check if licenses are OK for Apache
entry: "./scripts/ci/pre_commit_check_license.sh"
Expand Down
1 change: 1 addition & 0 deletions .rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
.eslintignore
.flake8
.rat-excludes
requirements
requirements.txt
.*log
.travis.yml
Expand Down
38 changes: 21 additions & 17 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,24 @@ jobs:
stage: pre-test
script: ./scripts/ci/ci_run_all_static_checks.sh
env: >-
PYTHON_VERSION=3.6
PYTHON_MAJOR_MINOR_VERSION=3.6
AIRFLOW_MOUNT_SOURCE_DIR_FOR_STATIC_CHECKS="true"
SKIP=pylint-tests,generate-requirements
SKIP=pylint-tests
- name: "Static checks - pylint tests only"
stage: pre-test
script: ./scripts/ci/ci_run_static_checks_pylint_tests.sh
env: >-
PYTHON_VERSION=3.6
PYTHON_MAJOR_MINOR_VERSION=3.6
AIRFLOW_MOUNT_SOURCE_DIR_FOR_STATIC_CHECKS="true"
- name: "Build documentation"
env: >-
PYTHON_VERSION=3.6
PYTHON_MAJOR_MINOR_VERSION=3.6
stage: test
script: ./scripts/ci/ci_docs.sh
- name: "Tests [Py3.6][Kubernetes][persistent]"
env: >-
BACKEND=postgres
PYTHON_VERSION=3.6
PYTHON_MAJOR_MINOR_VERSION=3.6
RUNTIME=kubernetes
ENABLE_KIND_CLUSTER=true
KUBERNETES_MODE=persistent_mode
Expand All @@ -68,7 +68,7 @@ jobs:
- name: "Tests [Py3.7][Kubernetes][git]"
env: >-
BACKEND=postgres
PYTHON_VERSION=3.7
PYTHON_MAJOR_MINOR_VERSION=3.7
RUNTIME=kubernetes
ENABLE_KIND_CLUSTER=true
KUBERNETES_MODE=git_mode
Expand All @@ -78,7 +78,7 @@ jobs:
- name: "Tests [Postgres9.6][Py3.6][integrations]"
env: >-
BACKEND=postgres
PYTHON_VERSION=3.6
PYTHON_MAJOR_MINOR_VERSION=3.6
POSTGRES_VERSION=9.6
ENABLED_INTEGRATIONS="cassandra kerberos mongo openldap rabbitmq redis"
RUN_INTEGRATION_TESTS=all
Expand All @@ -87,56 +87,60 @@ jobs:
env: >-
BACKEND=postgres
POSTGRES_VERSION=10
PYTHON_VERSION=3.6
PYTHON_MAJOR_MINOR_VERSION=3.6
script: "./scripts/ci/ci_run_airflow_testing.sh tests/providers"
stage: test
- name: "Tests [Postgres9.6][Py3.6][core]"
env: >-
BACKEND=postgres
POSTGRES_VERSION=9.6
PYTHON_VERSION=3.6
PYTHON_MAJOR_MINOR_VERSION=3.6
script: "./scripts/ci/ci_run_airflow_testing.sh --ignore=tests/providers"
stage: test
- name: "Tests [Sqlite][Py3.7][integrations]"
env: >-
BACKEND=sqlite
PYTHON_VERSION=3.7
PYTHON_MAJOR_MINOR_VERSION=3.7
ENABLED_INTEGRATIONS="cassandra kerberos mongo openldap rabbitmq redis"
RUN_INTEGRATION_TESTS=all
stage: test
- name: "Tests [Sqlite][Py3.6]"
env: >-
BACKEND=sqlite
PYTHON_VERSION=3.6
PYTHON_MAJOR_MINOR_VERSION=3.6
stage: test
- name: "Tests [MySQL5.7][Py3.6][integrations]"
env: >-
BACKEND=mysql
PYTHON_VERSION=3.6
PYTHON_MAJOR_MINOR_VERSION=3.6
MYSQL_VERSION=5.7
ENABLED_INTEGRATIONS="cassandra kerberos mongo openldap rabbitmq redis"
RUN_INTEGRATION_TESTS=all
stage: test
- name: "Tests [MySQL5.7][Py3.7][providers][kerberos]"
env: >-
BACKEND=mysql
PYTHON_VERSION=3.7
PYTHON_MAJOR_MINOR_VERSION=3.7
MYSQL_VERSION=5.7
ENABLED_INTEGRATIONS="kerberos"
script: "./scripts/ci/ci_run_airflow_testing.sh tests/providers"
stage: test
- name: "Tests [MySQL5.7][Py3.7][core][kerberos]"
env: >-
BACKEND=mysql
PYTHON_VERSION=3.7
PYTHON_MAJOR_MINOR_VERSION=3.7
MYSQL_VERSION=5.7
ENABLED_INTEGRATIONS="kerberos"
script: "./scripts/ci/ci_run_airflow_testing.sh --ignore=tests/providers"
stage: test
- name: "Generate requirements"
- name: "Generate requirements Py3.6"
env: >-
PYTHON_VERSION=3.6
INSTALL_AIRFLOW_VERSION="1.10.9"
PYTHON_MAJOR_MINOR_VERSION=3.6
stage: test
script: ./scripts/ci/ci_generate_requirements.sh
- name: "Generate requirements Py3.7"
env: >-
PYTHON_MAJOR_MINOR_VERSION=3.7
stage: test
script: ./scripts/ci/ci_generate_requirements.sh
- name: "Prepare & test backport packages 1.10.9"
Expand Down
15 changes: 12 additions & 3 deletions BREEZE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ Docker Images Used by Breeze
For all development tasks, unit tests, integration tests and static code checks, we use the
**CI image** maintained on the Docker Hub in the ``apache/airflow`` repository.
This Docker image contains a lot test-related packages (size of ~1GB).
Its tag follows the pattern of ``<BRANCH>-python<PYTHON_VERSION>-ci``
Its tag follows the pattern of ``<BRANCH>-python<PYTHON_MAJOR_MINOR_VERSION>-ci``
(for example, ``apache/airflow:master-python3.6-ci``). The image is built using the
`<Dockerfile.ci>`_ Dockerfile.

Expand Down Expand Up @@ -638,6 +638,7 @@ This is the current syntax for `./breeze <./breeze>`_:
build-only Only builds docker images without entering container
cleanup-images Cleans up the container images created
exec Execs into running breeze container in new terminal
generate-requirements Generates pinned requirements for pip dependencies
initialize-local-virtualenv Initializes local virtualenv
setup-autocomplete Sets up autocomplete for breeze
stop Stops the docker-compose evironment
Expand Down Expand Up @@ -709,10 +710,18 @@ This is the current syntax for `./breeze <./breeze>`_:
way to run multiple processes in the same container at the same time for example scheduler,
webserver, workers, database console and interactive terminal.
****************************************************************************************************
breeze [FLAGS] generate-requirements -- <EXTRA_ARGS>
Generates pinned requirements from setup.py. Those requirements are generated in requirements
directory - separately for different python version. Those requirements are used to run
CI builds as well as run repeatable production image builds. You can use those requirements
to predictably install released airflow versions. You should run it always after you update
setup.py.
****************************************************************************************************
breeze [FLAGS] initialize-local-virtualenv -- <EXTRA_ARGS>
Initializes locally created virtualenv installing all dependencies of Airflow
taking into account the frozen requirements from requirements.txt.
taking into account the frozen requirements from requirements folder.
This local virtualenv can be used to aid autocompletion and IDE support as
well as run unit tests directly from the IDE. You need to have virtualenv
activated before running this command.
Expand Down Expand Up @@ -835,7 +844,7 @@ This is the current syntax for `./breeze <./breeze>`_:
Choose Airflow variant
****************************************************************************************************
-p, --python <PYTHON_VERSION>
-p, --python <PYTHON_MAJOR_MINOR_VERSION>
Python version used for the image. This is always major/minor version.
One of:
Expand Down
91 changes: 68 additions & 23 deletions CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,8 @@ statsd, tableau, vertica, webhdfs, winrm, yandexcloud
.. END EXTRAS HERE
Pinned Airflow requirements.txt file
------------------------------------
Airflow dependencies
--------------------

Airflow is not a standard python project. Most of the python projects fall into one of two types -
application or library. As described in
Expand All @@ -341,31 +341,76 @@ be open to allow several different libraries with the same requirements to be in
The problem is that Apache Airflow is a bit of both - application to install and library to be used when
you are developing your own operators and DAGs.

This - seemingly unsolvable - puzzle is solved as follows:
This - seemingly unsolvable - puzzle is solved by having pinned requirement files. Those are available
as of airflow 1.10.10.

* by default when you install ``apache-airflow`` package - the dependencies are as open as possible while
still allowing the apache-airflow to install. This means that 'apache-airflow' package might fail to
install in case a direct or transitive dependency is released that breaks the installation. In such case
when installing ``apache-airflow``, you might need to provide additional constraints (for
example ``pip install apache-airflow==1.10.2 Werkzeug<1.0.0``)
Pinned requirement files
------------------------

* we have ``requirements.txt`` file generated automatically based on the set of all latest working
and tested requirement versions. You can also use that file as a constraints file when installing
apache airflow - either from the sources ``pip install -e . --constraint requirements.txt`` or
from the pypi package ``pip install apache-airflow --constraint requirements.txt``. Note that
this will also work with extras for example ``pip install .[gcp] --constraint requirements.txt`` or
``pip install apache-airflow[gcp] --constraint requirements.txt``
By default when you install ``apache-airflow`` package - the dependencies are as open as possible while
still allowing the apache-airflow package to install. This means that 'apache-airflow' package might fail to
install in case a direct or transitive dependency is released that breaks the installation. In such case
when installing ``apache-airflow``, you might need to provide additional constraints (for
example ``pip install apache-airflow==1.10.2 Werkzeug<1.0.0``)

The ``requirements.txt`` file should be updated automatically via pre-commit whenever you update dependencies
It reflects the current set of dependencies installed in the CI image of Apache Airflow.
The same set of requirements will be used to produce the production image.
However we now have ``requirements-python<PYTHON_MAJOR_MINOR_VERSION>.txt`` file generated
automatically and committed in the requirements folder based on the set of all latest working and tested
requirement versions. Those ``requirement-python<PYTHON_MAJOR_MINOR_VERSION>.txt`` files can be used as
constraints file when installing Apache Airflow - either from the sources

If you do not use pre-commits and the CI builds fails / you need to regenerate it, you can do it manually:
``pre-commit run generate-requirements --all-files`` or via script
``./scripts/ci/ci_generate_requirements.sh``.
This will try to regenerate the requirements.txt file with the latest requirements matching
the setup.py constraints.
.. code-block:: bash
pip install -e . --constraint requirements/requirements-python3.6.txt
or from the pypi package

.. code-block:: bash
pip install apache-airflow --constraint requirements/requirements-python3.6.txt
This works also with extras - for example:

.. code-block:: bash
pip install .[gcp] --constraint requirements/requirements-python3.6.txt
It is also possible to use constraints directly from github using tag/version name:

.. code-block:: bash
pip install apache-airflow[gcp]==1.10.10 \
--constraint https://raw.githubusercontent.com/apache/airflow/1.10.10/requirements/requirements-python3.6.txt
There are different set of fixed requirements for different python major/minor versions and you should
use the right requirements file for the right python version.

The ``requirements-python<PYTHON_MAJOR_MINOR_VERSION>.txt`` file MUST be regenerated every time after
the ``setup.py`` is updated. This is checked automatically in Travis CI build. There are separate
jobs for each python version that checks if the requirements should be updated.

If they are not updated, you should regenerate the requirements locally using Breeze as described below.

Generating requirement files
----------------------------

This should be done every time after you modify setup.py file. You can generate requirement files
using `Breeze <BREEZE.rst>`_ . Simply use those commands:

.. code-block:: bash
breeze generate-requirements --python 3.7
.. code-block:: bash
breeze generate-requirements --python 3.6
Note that when you generate requirements this way, you might update to latest version of requirements
that were released since the last time so during tests you might get errors unrelated to your change.
In this case the easiest way to fix it is to limit the culprit dependency to the previous version
with ``<NNNN.NN>`` constraint added in setup.py.

Backport providers packages
---------------------------
Expand Down Expand Up @@ -975,4 +1020,4 @@ prepare such packages on your own easily.
``python setup.py <PROVIDER_NAME> sdist`` but this is only needed in case of distribution of the packages.

Note that those are unofficial packages yet - they are not yet released in PyPi, but you might use them to
test the master versions of operators/hooks/sensors in a 1.10.* environment of airflow with Python3.6+
test the master versions of operators/hooks/sensors in Airflow 1.10.* environment with Python3.6+
27 changes: 18 additions & 9 deletions Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE}
ARG AIRFLOW_VERSION="2.0.0.dev0"
ENV AIRFLOW_VERSION=$AIRFLOW_VERSION

ARG PYTHON_MAJOR_MINOR_VERSION="3.6"
ENV PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION}

ARG UPGRADE_TO_LATEST_REQUIREMENTS="false"
ENV UPGRADE_TO_LATEST_REQUIREMENTS=${UPGRADE_TO_LATEST_REQUIREMENTS}

# Print versions
RUN echo "Base image: ${PYTHON_BASE_IMAGE}"
RUN echo "Airflow version: ${AIRFLOW_VERSION}"
Expand Down Expand Up @@ -128,12 +134,14 @@ RUN mkdir -pv /usr/share/man/man1 \
krb5-user \
ldap-utils \
less \
# The latest buster images do not have libpython 2.7 installed and it is needed
# To run virtualenv tests with python 2
libpython2.7-stdlib \
lsb-release \
net-tools \
openssh-client \
openssh-server \
postgresql-client \
python-selinux \
sqlite3 \
tmux \
unzip \
Expand Down Expand Up @@ -257,7 +265,7 @@ RUN echo "Pip version: ${PIP_VERSION}"
RUN pip install --upgrade pip==${PIP_VERSION}

# Install Google SDK
ENV GCLOUD_HOME="/opt/gcloud"
ENV GCLOUD_HOME="/opt/gcloud" CLOUDSDK_PYTHON=python${PYTHON_MAJOR_MINOR_VERSION}

RUN GCLOUD_VERSION="274.0.1" \
&& GCOUD_URL="https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-${GCLOUD_VERSION}-linux-x86_64.tar.gz" \
Expand Down Expand Up @@ -326,7 +334,7 @@ ENV AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}

RUN echo "Installing with extras: ${AIRFLOW_EXTRAS}."

ARG AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD="false"
ARG AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD="true"
ENV AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD=${AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD}

# By changing the CI build epoch we can force reinstalling Arflow from the current master
Expand All @@ -341,7 +349,7 @@ ENV AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH}
RUN \
if [[ "${AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD}" == "true" ]]; then \
pip install \
"https://github.com/apache/airflow/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \
&& pip uninstall --yes apache-airflow; \
fi

Expand All @@ -367,19 +375,20 @@ COPY setup.cfg ${AIRFLOW_SOURCES}/setup.cfg
COPY airflow/version.py ${AIRFLOW_SOURCES}/airflow/version.py
COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/__init__.py

COPY requirements.txt ${AIRFLOW_SOURCES}/requirements.txt
COPY requirements/requirements-python${PYTHON_MAJOR_MINOR_VERSION}.txt \
${AIRFLOW_SOURCES}/requirements/requirements-python${PYTHON_MAJOR_MINOR_VERSION}.txt

ENV UPGRADE_TO_LATEST_REQUIREMENTS_IN_DOCKER_BUILD=${UPGRADE_TO_LATEST_REQUIREMENTS_IN_DOCKER_BUILD}
# The goal of this line is to install the dependencies from the most current setup.py from sources
# This will be usually incremental small set of packages in CI optimized build, so it will be very fast
# In non-CI optimized build this will install all dependencies before installing sources.
# Usually we will install versions constrained to the current requirements.txt
# Usually we will install versions constrained to the current requirements file
# But in cron job we will install latest versions matching setup.py to see if there is no breaking change
RUN \
if [[ "${UPGRADE_TO_LATEST_REQUIREMENTS_IN_DOCKER_BUILD}" == "true" ]]; then \
if [[ "${UPGRADE_TO_LATEST_REQUIREMENTS}" == "true" ]]; then \
pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade; \
else \
pip install -e ".[${AIRFLOW_EXTRAS}]" --constraint ${AIRFLOW_SOURCES}/requirements.txt ; \
pip install -e ".[${AIRFLOW_EXTRAS}]" \
--constraint ${AIRFLOW_SOURCES}/requirements/requirements-python${PYTHON_MAJOR_MINOR_VERSION}.txt ; \
fi

# Copy all the www/ files we need to compile assets. Done as two separate COPY
Expand Down
Loading

0 comments on commit 3fb5f15

Please sign in to comment.