diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index da76fcbeffdaf..42dc2da0acc5d 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -180,6 +180,7 @@ jobs: RUNS_ON: "${{ needs.build-info.outputs.runs-on }}" BACKEND: sqlite VERSION_SUFFIX_FOR_PYPI: "dev0" + USE_UV: "true" steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" @@ -258,6 +259,7 @@ jobs: BACKEND: sqlite VERSION_SUFFIX_FOR_PYPI: "dev0" INCLUDE_NOT_READY_PROVIDERS: "true" + USE_UV: "true" steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1cb19201ddb05..461be73498d82 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -282,6 +282,7 @@ jobs: # Force more parallelism for build even on public images PARALLELISM: 6 VERSION_SUFFIX_FOR_PYPI: "dev0" + USE_UV: "true" steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" @@ -1863,6 +1864,7 @@ jobs: BACKEND: sqlite VERSION_SUFFIX_FOR_PYPI: "dev0" DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} + USE_UV: "true" steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" @@ -1898,6 +1900,58 @@ jobs: PYTHON_VERSIONS: ${{needs.build-info.outputs.all-python-versions-list-as-string}} DEBUG_RESOURCES: ${{ needs.build-info.outputs.debug-resources }} + build-prod-images-pip: + strategy: + matrix: + python-version: ${{ fromJson(needs.build-info.outputs.python-versions) }} + timeout-minutes: 80 + name: ${{needs.build-info.outputs.build-job-description}} PROD image pip (main) ${{matrix.python-version}} + runs-on: ["ubuntu-22.04"] + needs: [build-info, build-ci-images] + env: + DEFAULT_BRANCH: ${{ needs.build-info.outputs.default-branch }} + DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }} + RUNS_ON: "${{needs.build-info.outputs.runs-on}}" + BACKEND: sqlite + VERSION_SUFFIX_FOR_PYPI: "dev0" + DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} + USE_UV: "false" + steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' + - uses: actions/checkout@v4 + with: + ref: ${{ needs.build-info.outputs.targetCommitSha }} + persist-credentials: false + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' + - name: "Install Breeze" + uses: ./.github/actions/breeze + with: + python-version: ${{ env.REPRODUCIBLE_PYTHON_VERSION }} + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' + - name: Build PROD Image pip ${{ matrix.python-version }}:${{env.IMAGE_TAG}} + uses: ./.github/actions/build-prod-images + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' + with: + build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} + chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} + python-version: ${{ matrix.python-version }} + env: + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} + PYTHON_VERSIONS: ${{needs.build-info.outputs.all-python-versions-list-as-string}} + DEBUG_RESOURCES: ${{ needs.build-info.outputs.debug-resources }} + IMAGE_TAG: "pip-${{ github.event.pull_request.head.sha || github.sha }}" + build-prod-images-bullseye: strategy: matrix: @@ -1914,6 +1968,7 @@ jobs: BACKEND: sqlite VERSION_SUFFIX_FOR_PYPI: "dev0" DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} + USE_UV: "true" steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" @@ -1970,6 +2025,7 @@ jobs: BACKEND: sqlite VERSION_SUFFIX_FOR_PYPI: "dev0" DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} + USE_UV: "true" steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" @@ -2027,6 +2083,7 @@ jobs: BACKEND: sqlite VERSION_SUFFIX_FOR_PYPI: "dev0" DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} + USE_UV: "true" steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" @@ -2078,6 +2135,7 @@ jobs: BACKEND: sqlite VERSION_SUFFIX_FOR_PYPI: "dev0" DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} + USE_UV: "true" steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" @@ -2134,6 +2192,7 @@ jobs: BACKEND: sqlite VERSION_SUFFIX_FOR_PYPI: "dev0" DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} + USE_UV: "true" steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" @@ -2530,6 +2589,7 @@ jobs: RUNS_ON: "${{needs.build-info.outputs.runs-on}}" # Force more parallelism for build even on small instances PARALLELISM: 6 + USE_UV: "true" if: > needs.build-info.outputs.in-workflow-build == 'true' && needs.build-info.outputs.canary-run != 'true' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a5a8efe06aef9..bdef4440abbcd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -452,7 +452,7 @@ repos: name: Update extras in documentation entry: ./scripts/ci/pre_commit/pre_commit_insert_extras.py language: python - files: ^setup\.py$|^contributing-docs/12_airflow_dependencies_and_extras.rst$|^INSTALL$|^airflow/providers/.*/provider\.yaml$ + files: ^contributing-docs/12_airflow_dependencies_and_extras.rst$|^INSTALL$|^airflow/providers/.*/provider\.yaml$|^Dockerfile.* pass_filenames: false additional_dependencies: ['rich>=12.4.4', 'tomli'] - id: check-extras-order diff --git a/Dockerfile b/Dockerfile index 34d80b2e2cd8c..cc824b91b05b0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,8 +23,9 @@ # airflow-build-image - there all airflow dependencies can be installed (and # built - for those dependencies that require # build essentials). Airflow is installed there with -# --user switch so that all the dependencies are -# installed to ${HOME}/.local +# ${HOME}/.local virtualenv which is also considered +# As --user folder by python when creating venv with +# --system-site-packages # # main - this is the actual production image that is much # smaller because it does not contain all the build @@ -35,7 +36,7 @@ # much smaller. # # Use the same builder frontend version for everyone -ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf-kubernetes,common-io,docker,elasticsearch,ftp,google,google-auth,graphviz,grpc,hashicorp,http,ldap,microsoft-azure,mysql,odbc,openlineage,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,virtualenv" +ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf-kubernetes,common-io,docker,elasticsearch,ftp,google,google-auth,graphviz,grpc,hashicorp,http,ldap,microsoft-azure,mysql,odbc,openlineage,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,uv,virtualenv" ARG ADDITIONAL_AIRFLOW_EXTRAS="" ARG ADDITIONAL_PYTHON_DEPS="" @@ -54,6 +55,16 @@ ARG AIRFLOW_USE_UV="false" ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" ARG AIRFLOW_IMAGE_README_URL="https://raw.githubusercontent.com/apache/airflow/main/docs/docker-stack/README.md" +# By default we install latest airflow from PyPI so we do not need to copy sources of Airflow +# from the host - so we are using Dockerfile and copy it to /Dockerfile in target image +# because this is the only file we know exists locally. This way you can build the image in PyPI with +# **just** the Dockerfile and no need for any other files from Airflow repository. +# However, in case of breeze/development use we use latest sources and we override those +# SOURCES_FROM/TO with "." and "/opt/airflow" respectively - so that sources of Airflow (and all providers) +# are used to build the PROD image used in tests. +ARG AIRFLOW_SOURCES_FROM="Dockerfile" +ARG AIRFLOW_SOURCES_TO="/Dockerfile" + # By default latest released version of airflow is installed (when empty) but this value can be overridden # and we can install version according to specification (For example ==2.0.2 or <3.0.0). ARG AIRFLOW_VERSION_SPECIFICATION="" @@ -419,10 +430,7 @@ COPY <<"EOF" /install_packaging_tools.sh common::get_colors common::get_packaging_tool -common::get_airflow_version_specification -common::override_pip_version_if_needed common::show_packaging_tool_version_and_location - common::install_packaging_tools EOF @@ -457,23 +465,23 @@ function install_airflow_dependencies_from_branch_tip() { set +x common::install_packaging_tools set -x + echo "${COLOR_BLUE}Uninstalling providers. Dependencies remain${COLOR_RESET}" # Uninstall airflow and providers to keep only the dependencies. In the future when # planned https://github.com/pypa/pip/issues/11440 is implemented in pip we might be able to use this # flag and skip the remove step. - ${PACKAGING_TOOL_CMD} freeze | grep apache-airflow-providers | xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} 2>/dev/null || true + pip freeze | grep apache-airflow-providers | xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} || true set +x echo echo "${COLOR_BLUE}Uninstalling just airflow. Dependencies remain. Now target airflow can be reinstalled using mostly cached dependencies${COLOR_RESET}" echo set +x - ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow || true + ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow set -x } common::get_colors common::get_packaging_tool common::get_airflow_version_specification -common::override_pip_version_if_needed common::get_constraints_location common::show_packaging_tool_version_and_location @@ -513,10 +521,15 @@ function common::get_packaging_tool() { echo export PACKAGING_TOOL="uv" export PACKAGING_TOOL_CMD="uv pip" - export EXTRA_INSTALL_FLAGS="--python ${PYTHON_BIN}" - export EXTRA_UNINSTALL_FLAGS="--python ${PYTHON_BIN}" - export RESOLUTION_HIGHEST_FLAG="--resolution highest" - export RESOLUTION_LOWEST_DIRECT_FLAG="--resolution lowest-direct" + if [[ -z ${VIRTUAL_ENV=} ]]; then + export EXTRA_INSTALL_FLAGS="--python ${PYTHON_BIN}" + export EXTRA_UNINSTALL_FLAGS="--python ${PYTHON_BIN}" + else + export EXTRA_INSTALL_FLAGS="" + export EXTRA_UNINSTALL_FLAGS="" + fi + export UPGRADE_EAGERLY="--upgrade --resolution highest" + export UPGRADE_IF_NEEDED="--upgrade --resolution lowest-direct" else echo echo "${COLOR_BLUE}Using 'pip' to install Airflow${COLOR_RESET}" @@ -525,8 +538,8 @@ function common::get_packaging_tool() { export PACKAGING_TOOL_CMD="pip" export EXTRA_INSTALL_FLAGS="--root-user-action ignore" export EXTRA_UNINSTALL_FLAGS="--yes" - export RESOLUTION_HIGHEST_FLAG="--upgrade-strategy eager" - export RESOLUTION_LOWEST_DIRECT_FLAG="--upgrade --upgrade-strategy only-if-needed" + export UPGRADE_EAGERLY="--upgrade --upgrade-strategy eager" + export UPGRADE_IF_NEEDED="--upgrade --upgrade-strategy only-if-needed" fi } @@ -538,14 +551,6 @@ function common::get_airflow_version_specification() { fi } -function common::override_pip_version_if_needed() { - if [[ -n ${AIRFLOW_VERSION} ]]; then - if [[ ${AIRFLOW_VERSION} =~ ^2\.0.* || ${AIRFLOW_VERSION} =~ ^1\.* ]]; then - export AIRFLOW_PIP_VERSION=24.0 - fi - fi -} - function common::get_constraints_location() { if [[ -f "${HOME}/constraints.txt" ]]; then # constraints are already downloaded, do not calculate/override again @@ -592,6 +597,15 @@ function common::show_packaging_tool_version_and_location() { } function common::install_packaging_tools() { + if [[ "${VIRTUAL_ENV=}" != "" ]]; then + echo + echo "${COLOR_BLUE}Checking packaging tools in venv: ${VIRTUAL_ENV}${COLOR_RESET}" + echo + else + echo + echo "${COLOR_BLUE}Checking packaging tools for system Python installation: $(which python)${COLOR_RESET}" + echo + fi if [[ ! ${AIRFLOW_PIP_VERSION} =~ [0-9.]* ]]; then echo echo "${COLOR_BLUE}Installing pip version from spec ${AIRFLOW_PIP_VERSION}${COLOR_RESET}" @@ -684,6 +698,7 @@ COPY <<"EOF" /install_from_docker_context_files.sh . "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + function install_airflow_and_providers_from_docker_context_files(){ if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} @@ -699,46 +714,36 @@ function install_airflow_and_providers_from_docker_context_files(){ exit 1 fi - # shellcheck disable=SC2206 - local packaging_flags=( - # Don't quote this -- if it is empty we don't want it to create an - # empty array element - --find-links="file:///docker-context-files" - ) + # This is needed to get package names for local context packages + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} --constraint ${HOME}/constraints.txt packaging - # Find Apache Airflow packages in docker-context files - local reinstalling_apache_airflow_package - reinstalling_apache_airflow_package=$(ls \ - /docker-context-files/apache?airflow?[0-9]*.{whl,tar.gz} 2>/dev/null || true) - # Add extras when installing airflow - if [[ -n "${reinstalling_apache_airflow_package}" ]]; then - # When a provider depends on a dev version of Airflow, we need to - # specify `apache-airflow==$VER`, otherwise pip will look for it on - # pip, and fail to find it - - # This will work as long as the wheel file is correctly named, which it - # will be if it was build by wheel tooling - local ver - ver=$(basename "$reinstalling_apache_airflow_package" | cut -d "-" -f 2) - reinstalling_apache_airflow_package="apache-airflow[${AIRFLOW_EXTRAS}]==$ver" + if [[ -n ${AIRFLOW_EXTRAS=} ]]; then + AIRFLOW_EXTRAS_TO_INSTALL="[${AIRFLOW_EXTRAS}]" + else + AIRFLOW_EXTRAS_TO_INSTALL="" fi - if [[ -z "${reinstalling_apache_airflow_package}" && ${AIRFLOW_VERSION=} != "" ]]; then + # Find Apache Airflow package in docker-context files + readarray -t install_airflow_package < <(EXTRAS="${AIRFLOW_EXTRAS_TO_INSTALL}" \ + python /scripts/docker/get_package_specs.py /docker-context-files/apache?airflow?[0-9]*.{whl,tar.gz} 2>/dev/null || true) + echo + echo "${COLOR_BLUE}Found airflow packages in docker-context-files folder: ${install_airflow_package[*]}${COLOR_RESET}" + echo + + if [[ -z "${install_airflow_package[*]}" && ${AIRFLOW_VERSION=} != "" ]]; then # When we install only provider packages from docker-context files, we need to still # install airflow from PyPI when AIRFLOW_VERSION is set. This handles the case where # pre-release dockerhub image of airflow is built, but we want to install some providers from # docker-context files - reinstalling_apache_airflow_package="apache-airflow[${AIRFLOW_EXTRAS}]==${AIRFLOW_VERSION}" - fi - # Find Apache Airflow packages in docker-context files - local reinstalling_apache_airflow_providers_packages - reinstalling_apache_airflow_providers_packages=$(ls \ - /docker-context-files/apache?airflow?providers*.{whl,tar.gz} 2>/dev/null || true) - if [[ -z "${reinstalling_apache_airflow_package}" && \ - -z "${reinstalling_apache_airflow_providers_packages}" ]]; then - return + install_airflow_package=("apache-airflow[${AIRFLOW_EXTRAS}]==${AIRFLOW_VERSION}") fi + # Find Provider packages in docker-context files + readarray -t installing_providers_packages< <(python /scripts/docker/get_package_specs.py /docker-context-files/apache?airflow?providers*.{whl,tar.gz} 2>/dev/null || true) + echo + echo "${COLOR_BLUE}Found provider packages in docker-context-files folder: ${installing_providers_packages[*]}${COLOR_RESET}" + echo + if [[ ${USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES=} == "true" ]]; then local python_version python_version=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') @@ -750,19 +755,19 @@ function install_airflow_and_providers_from_docker_context_files(){ echo # force reinstall all airflow + provider packages with constraints found in set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} "${packaging_flags[@]}" --upgrade \ + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade \ ${ADDITIONAL_PIP_INSTALL_FLAGS} --constraint "${local_constraints_file}" \ - ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} + "${install_airflow_package[@]}" "${installing_providers_packages[@]}" set +x else echo echo "${COLOR_BLUE}Installing docker-context-files packages with constraints from GitHub${COLOR_RESET}" echo set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} "${packaging_flags[@]}" \ + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ --constraint "${HOME}/constraints.txt" \ - ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} + "${install_airflow_package[@]}" "${installing_providers_packages[@]}" set +x fi else @@ -770,9 +775,9 @@ function install_airflow_and_providers_from_docker_context_files(){ echo "${COLOR_BLUE}Installing docker-context-files packages without constraints${COLOR_RESET}" echo set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} "${packaging_flags[@]}" \ + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} + "${install_airflow_package[@]}" "${installing_providers_packages[@]}" set +x fi common::install_packaging_tools @@ -799,7 +804,6 @@ function install_all_other_packages_from_docker_context_files() { common::get_colors common::get_packaging_tool common::get_airflow_version_specification -common::override_pip_version_if_needed common::get_constraints_location common::show_packaging_tool_version_and_location @@ -808,6 +812,41 @@ install_airflow_and_providers_from_docker_context_files install_all_other_packages_from_docker_context_files EOF +# The content below is automatically copied from scripts/docker/get_package_specs.py +COPY <<"EOF" /get_package_specs.py +#!/usr/bin/env python +from __future__ import annotations + +import os +import sys +from pathlib import Path + +from packaging.utils import ( + InvalidSdistFilename, + InvalidWheelFilename, + parse_sdist_filename, + parse_wheel_filename, +) + + +def print_package_specs(extras: str = "") -> None: + for package_path in sys.argv[1:]: + try: + package, _, _, _ = parse_wheel_filename(Path(package_path).name) + except InvalidWheelFilename: + try: + package, _ = parse_sdist_filename(Path(package_path).name) + except InvalidSdistFilename: + print(f"Could not parse package name from {package_path}", file=sys.stderr) + continue + print(f"{package}{extras} @ file://{package_path}") + + +if __name__ == "__main__": + print_package_specs(extras=os.environ.get("EXTRAS", "")) +EOF + + # The content below is automatically copied from scripts/docker/install_airflow.sh COPY <<"EOF" /install_airflow.sh #!/usr/bin/env bash @@ -815,12 +854,21 @@ COPY <<"EOF" /install_airflow.sh . "$( dirname "${BASH_SOURCE[0]}" )/common.sh" function install_airflow() { - # Coherence check for editable installation mode. - if [[ ${AIRFLOW_INSTALLATION_METHOD} != "." && \ - ${AIRFLOW_INSTALL_EDITABLE_FLAG} == "--editable" ]]; then + # Determine the installation_command_flags based on AIRFLOW_INSTALLATION_METHOD method + local installation_command_flags + if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then + # When installing from sources - we always use `--editable` mode + installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + elif [[ ${AIRFLOW_INSTALLATION_METHOD} == "apache-airflow" ]]; then + installation_command_flags="apache-airflow[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + elif [[ ${AIRFLOW_INSTALLATION_METHOD} == apache-airflow\ @\ * ]]; then + installation_command_flags="apache-airflow[${AIRFLOW_EXTRAS}] @ ${AIRFLOW_VERSION_SPECIFICATION/apache-airflow @//}" + else + echo + echo "${COLOR_RED}The '${INSTALLATION_METHOD}' installation method is not supported${COLOR_RESET}" + echo + echo "${COLOR_YELLOW}Supported methods are ('.', 'apache-airflow', 'apache-airflow @ URL')${COLOR_RESET}" echo - echo "${COLOR_RED}ERROR! You can only use --editable flag when installing airflow from sources!${COLOR_RESET}" - echo "${COLOR_RED} Current installation method is '${AIRFLOW_INSTALLATION_METHOD} and should be '.'${COLOR_RESET}" exit 1 fi # Remove mysql from extras if client is not going to be installed @@ -841,14 +889,10 @@ function install_airflow() { ${PACKAGING_TOOL_CMD} freeze | grep apache-airflow | xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} 2>/dev/null || true set +x echo - echo "${COLOR_BLUE}Installing all packages with eager upgrade with ${AIRFLOW_INSTALL_EDITABLE_FLAG} mode${COLOR_RESET}" + echo "${COLOR_BLUE}Installing all packages in eager upgrade mode. Installation method: ${AIRFLOW_INSTALLATION_METHOD}${COLOR_RESET}" echo set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade ${RESOLUTION_HIGHEST_FLAG} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ - ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_EAGERLY} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags} ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} set +x common::install_packaging_tools echo @@ -857,14 +901,11 @@ function install_airflow() { pip check else echo - echo "${COLOR_BLUE}Installing all packages with constraints${COLOR_RESET}" + echo "${COLOR_BLUE}Installing all packages with constraints. Installation method: ${AIRFLOW_INSTALLATION_METHOD}${COLOR_RESET}" echo set -x # Install all packages with constraints - if ! ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ - --constraint "${HOME}/constraints.txt"; then + if ! ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags} --constraint "${HOME}/constraints.txt"; then set +x echo echo "${COLOR_YELLOW}Likely pyproject.toml has new dependencies conflicting with constraints.${COLOR_RESET}" @@ -872,10 +913,7 @@ function install_airflow() { echo "${COLOR_BLUE}Falling back to no-constraints, lowest-direct resolution installation.${COLOR_RESET}" echo set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade ${RESOLUTION_LOWEST_DIRECT_FLAG} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_IF_NEEDED} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags} fi set +x common::install_packaging_tools @@ -890,7 +928,6 @@ function install_airflow() { common::get_colors common::get_packaging_tool common::get_airflow_version_specification -common::override_pip_version_if_needed common::get_constraints_location common::show_packaging_tool_version_and_location @@ -913,7 +950,7 @@ function install_additional_dependencies() { echo "${COLOR_BLUE}Installing additional dependencies while upgrading to newer dependencies${COLOR_RESET}" echo set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade ${RESOLUTION_HIGHEST_FLAG} \ + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_EAGERLY} \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ ${ADDITIONAL_PYTHON_DEPS} ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} set +x @@ -927,7 +964,7 @@ function install_additional_dependencies() { echo "${COLOR_BLUE}Installing additional dependencies upgrading only if needed${COLOR_RESET}" echo set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade "${RESOLUTION_LOWEST_DIRECT_FLAG}" \ + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_IF_NEEDED} \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ ${ADDITIONAL_PYTHON_DEPS} set +x @@ -942,13 +979,53 @@ function install_additional_dependencies() { common::get_colors common::get_packaging_tool common::get_airflow_version_specification -common::override_pip_version_if_needed common::get_constraints_location common::show_packaging_tool_version_and_location install_additional_dependencies EOF +# The content below is automatically copied from scripts/docker/create_prod_venv.sh +COPY <<"EOF" /create_prod_venv.sh +#!/usr/bin/env bash +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +function create_prod_venv() { + echo + echo "${COLOR_BLUE}Removing ${HOME}/.local and re-creating it as virtual environment.${COLOR_RESET}" + rm -rf ~/.local + python -m venv ~/.local + echo "${COLOR_BLUE}The ${HOME}/.local virtualenv created.${COLOR_RESET}" +} + +common::get_colors +common::get_packaging_tool +common::show_packaging_tool_version_and_location +create_prod_venv +common::install_packaging_tools +EOF + +# The content below is automatically copied from scripts/docker/create_prod_venv.sh +COPY <<"EOF" /create_prod_venv.sh +#!/usr/bin/env bash +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +function create_prod_venv() { + echo + echo "${COLOR_BLUE}Removing ${HOME}/.local and re-creating it as virtual environment.${COLOR_RESET}" + rm -rf ~/.local + python -m venv ~/.local + echo "${COLOR_BLUE}The ${HOME}/.local virtualenv created.${COLOR_RESET}" +} + +common::get_colors +common::get_packaging_tool +common::show_packaging_tool_version_and_location +create_prod_venv +common::install_packaging_tools +EOF + + # The content below is automatically copied from scripts/docker/entrypoint_prod.sh COPY <<"EOF" /entrypoint_prod.sh @@ -1330,9 +1407,6 @@ ARG INSTALL_MYSQL_CLIENT="true" ARG INSTALL_MYSQL_CLIENT_TYPE="mariadb" ARG INSTALL_MSSQL_CLIENT="true" ARG INSTALL_POSTGRES_CLIENT="true" -ARG AIRFLOW_PIP_VERSION -ARG AIRFLOW_UV_VERSION -ARG AIRFLOW_USE_UV ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \ INSTALL_MYSQL_CLIENT_TYPE=${INSTALL_MYSQL_CLIENT_TYPE} \ @@ -1353,9 +1427,6 @@ ENV PATH=${PATH}:/opt/mssql-tools/bin # By default we do not install from docker context files but if we decide to install from docker context # files, we should override those variables to "docker-context-files" ARG DOCKER_CONTEXT_FILES="Dockerfile" - -COPY ${DOCKER_CONTEXT_FILES} /docker-context-files - ARG AIRFLOW_HOME ARG AIRFLOW_USER_HOME_DIR ARG AIRFLOW_UID @@ -1364,6 +1435,8 @@ RUN adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-passw --quiet "airflow" --uid "${AIRFLOW_UID}" --gid "0" --home "${AIRFLOW_USER_HOME_DIR}" && \ mkdir -p ${AIRFLOW_HOME} && chown -R "airflow:0" "${AIRFLOW_USER_HOME_DIR}" ${AIRFLOW_HOME} +COPY --chown=${AIRFLOW_UID}:0 ${DOCKER_CONTEXT_FILES} /docker-context-files + USER airflow ARG AIRFLOW_REPO=apache/airflow @@ -1395,11 +1468,8 @@ ARG AIRFLOW_VERSION_SPECIFICATION ARG AIRFLOW_INSTALLATION_METHOD="apache-airflow" # By default we do not upgrade to latest dependencies ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" -# By default we install latest airflow from PyPI so we do not need to copy sources of Airflow -# but in case of breeze/CI builds we use latest sources and we override those -# those SOURCES_FROM/TO with "." and "/opt/airflow" respectively -ARG AIRFLOW_SOURCES_FROM="Dockerfile" -ARG AIRFLOW_SOURCES_TO="/Dockerfile" +ARG AIRFLOW_SOURCES_FROM +ARG AIRFLOW_SOURCES_TO RUN if [[ -f /docker-context-files/pip.conf ]]; then \ @@ -1413,6 +1483,10 @@ RUN if [[ -f /docker-context-files/pip.conf ]]; then \ # Additional PIP flags passed to all pip install commands except reinstalling pip itself ARG ADDITIONAL_PIP_INSTALL_FLAGS="" +ARG AIRFLOW_PIP_VERSION +ARG AIRFLOW_UV_VERSION +ARG AIRFLOW_USE_UV + ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \ AIRFLOW_USE_UV=${AIRFLOW_USE_UV} \ @@ -1430,21 +1504,19 @@ ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \ AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \ DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \ - PATH=${PATH}:${AIRFLOW_USER_HOME_DIR}/.local/bin \ + PATH=${AIRFLOW_USER_HOME_DIR}/.local/bin:${PATH} \ PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \ ADDITIONAL_PIP_INSTALL_FLAGS=${ADDITIONAL_PIP_INSTALL_FLAGS} \ AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \ AIRFLOW_HOME=${AIRFLOW_HOME} \ AIRFLOW_UID=${AIRFLOW_UID} \ - AIRFLOW_INSTALL_EDITABLE_FLAG="" \ - UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} \ - # By default PIP installs everything to ~/.local - PIP_USER="true" + UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} + # Copy all scripts required for installation - changing any of those should lead to # rebuilding from here COPY --from=scripts common.sh install_packaging_tools.sh \ - install_airflow_dependencies_from_branch_tip.sh /scripts/docker/ + install_airflow_dependencies_from_branch_tip.sh create_prod_venv.sh /scripts/docker/ # We can set this value to true in case we want to install .whl/.tar.gz packages placed in the # docker-context-files folder. This can be done for both additional packages you want to install @@ -1463,13 +1535,19 @@ ARG USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES="false" ARG AIRFLOW_CI_BUILD_EPOCH="10" ENV AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} + # In case of Production build image segment we want to pre-install main version of airflow # dependencies from GitHub so that we do not have to always reinstall it from the scratch. # The Airflow and providers are uninstalled, only dependencies remain # the cache is only used when "upgrade to newer dependencies" is not set to automatically # account for removed dependencies (we do not install them in the first place) and in case # INSTALL_PACKAGES_FROM_CONTEXT is not set (because then caching it from main makes no sense). + +# By default PIP installs everything to ~/.local and it's also treated as VIRTUALENV +ENV VIRTUAL_ENV="${AIRFLOW_USER_HOME_DIR}/.local" + RUN bash /scripts/docker/install_packaging_tools.sh; \ + bash /scripts/docker/create_prod_venv.sh; \ if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \ ${INSTALL_PACKAGES_FROM_CONTEXT} == "false" && \ ${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \ @@ -1492,7 +1570,7 @@ ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS} \ WORKDIR ${AIRFLOW_HOME} COPY --from=scripts install_from_docker_context_files.sh install_airflow.sh \ - install_additional_dependencies.sh /scripts/docker/ + install_additional_dependencies.sh create_prod_venv.sh get_package_specs.py /scripts/docker/ # Useful for creating a cache id based on the underlying architecture, preventing the use of cached python packages from # an incorrect architecture. @@ -1502,7 +1580,7 @@ ARG PIP_CACHE_EPOCH="9" # hadolint ignore=SC2086, SC2010, DL3042 RUN --mount=type=cache,id=$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ - if [[ ${INSTALL_PACKAGES_FROM_CONTEXT} == "true" ]]; then \ + if [[ ${INSTALL_PACKAGES_FROM_CONTEXT} == "true" ]]; then \ bash /scripts/docker/install_from_docker_context_files.sh; \ fi; \ if ! airflow version 2>/dev/null >/dev/null; then \ @@ -1514,8 +1592,8 @@ RUN --mount=type=cache,id=$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$P find "${AIRFLOW_USER_HOME_DIR}/.local/" -name '*.pyc' -print0 | xargs -0 rm -f || true ; \ find "${AIRFLOW_USER_HOME_DIR}/.local/" -type d -name '__pycache__' -print0 | xargs -0 rm -rf || true ; \ # make sure that all directories and files in .local are also group accessible - find "${AIRFLOW_USER_HOME_DIR}/.local" -executable -print0 | xargs --null chmod g+x; \ - find "${AIRFLOW_USER_HOME_DIR}/.local" -print0 | xargs --null chmod g+rw + find "${AIRFLOW_USER_HOME_DIR}/.local" -executable ! -type l -print0 | xargs --null chmod g+x; \ + find "${AIRFLOW_USER_HOME_DIR}/.local" ! -type l -print0 | xargs --null chmod g+rw # In case there is a requirements.txt file in "docker-context-files" it will be installed # during the build additionally to whatever has been installed so far. It is recommended that @@ -1523,7 +1601,7 @@ RUN --mount=type=cache,id=$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$P # hadolint ignore=DL3042 RUN --mount=type=cache,id=additional-requirements-$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ if [[ -f /docker-context-files/requirements.txt ]]; then \ - pip install --user -r /docker-context-files/requirements.txt; \ + pip install -r /docker-context-files/requirements.txt; \ fi ############################################################################################## @@ -1545,9 +1623,6 @@ LABEL org.apache.airflow.distro="debian" \ org.apache.airflow.uid="${AIRFLOW_UID}" ARG PYTHON_BASE_IMAGE -ARG AIRFLOW_PIP_VERSION -ARG AIRFLOW_UV_VERSION -ARG AIRFLOW_USE_UV ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ # Make sure noninteractive debian install is used and language variables set @@ -1588,6 +1663,7 @@ ARG AIRFLOW_HOME # By default PIP installs everything to ~/.local ENV PATH="${AIRFLOW_USER_HOME_DIR}/.local/bin:${PATH}" \ + VIRTUAL_ENV="${AIRFLOW_USER_HOME_DIR}/.local" \ AIRFLOW_UID=${AIRFLOW_UID} \ AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \ AIRFLOW_HOME=${AIRFLOW_HOME} @@ -1613,20 +1689,24 @@ RUN bash /scripts/docker/install_mysql.sh prod \ && mkdir -pv "${AIRFLOW_HOME}/logs" \ && chown -R airflow:0 "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}" \ && chmod -R g+rw "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}" \ - && find "${AIRFLOW_HOME}" -executable -print0 | xargs --null chmod g+x \ - && find "${AIRFLOW_USER_HOME_DIR}" -executable -print0 | xargs --null chmod g+x + && find "${AIRFLOW_HOME}" -executable ! -type l -print0 | xargs --null chmod g+x \ + && find "${AIRFLOW_USER_HOME_DIR}" -executable ! -type l -print0 | xargs --null chmod g+x + +ARG AIRFLOW_SOURCES_FROM +ARG AIRFLOW_SOURCES_TO COPY --from=airflow-build-image --chown=airflow:0 \ "${AIRFLOW_USER_HOME_DIR}/.local" "${AIRFLOW_USER_HOME_DIR}/.local" +COPY --from=airflow-build-image --chown=airflow:0 \ + "${AIRFLOW_USER_HOME_DIR}/constraints.txt" "${AIRFLOW_USER_HOME_DIR}/constraints.txt" +# In case of editable build also copy airflow sources so that they are available in the main image +# For regular image (non-editable) this will be just Dockerfile copied to /Dockerfile +COPY --from=airflow-build-image --chown=airflow:0 "${AIRFLOW_SOURCES_TO}" "${AIRFLOW_SOURCES_TO}" + COPY --from=scripts entrypoint_prod.sh /entrypoint COPY --from=scripts clean-logs.sh /clean-logs COPY --from=scripts airflow-scheduler-autorestart.sh /airflow-scheduler-autorestart - -ARG AIRFLOW_PIP_VERSION -ARG AIRFLOW_UV_VERSION -ARG AIRFLOW_USE_UV - # Make /etc/passwd root-group-writeable so that user can be dynamically added by OpenShift # See https://github.com/apache/airflow/issues/9248 # Set default groups for airflow and root user @@ -1638,12 +1718,13 @@ RUN chmod a+rx /entrypoint /clean-logs \ # make sure that the venv is activated for all users # including plain sudo, sudo with --interactive flag -RUN sed --in-place=.bak "s/secure_path=\"/secure_path=\"\/.venv\/bin:/" /etc/sudoers +RUN sed --in-place=.bak "s/secure_path=\"/secure_path=\"$(echo -n ${AIRFLOW_USER_HOME_DIR} | \ + sed 's/\//\\\//g')\/.local\/bin:/" /etc/sudoers ARG AIRFLOW_VERSION - -COPY --from=scripts install_packaging_tools.sh /scripts/docker/ -RUN bash /scripts/docker/install_packaging_tools.sh +ARG AIRFLOW_PIP_VERSION +ARG AIRFLOW_UV_VERSION +ARG AIRFLOW_USE_UV # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation # to learn more about the way how signals are handled by the image @@ -1652,7 +1733,6 @@ ENV DUMB_INIT_SETSID="1" \ PS1="(airflow)" \ AIRFLOW_VERSION=${AIRFLOW_VERSION} \ AIRFLOW__CORE__LOAD_EXAMPLES="false" \ - PIP_USER="true" \ PATH="/root/bin:${PATH}" \ AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \ @@ -1698,5 +1778,6 @@ LABEL org.apache.airflow.distro="debian" \ org.opencontainers.image.ref.name="airflow" \ org.opencontainers.image.title="Production Airflow Image" \ org.opencontainers.image.description="Reference, production-ready Apache Airflow image" + ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"] CMD [] diff --git a/Dockerfile.ci b/Dockerfile.ci index 233dd86efd335..cc6e3786dbedb 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -377,10 +377,7 @@ COPY <<"EOF" /install_packaging_tools.sh common::get_colors common::get_packaging_tool -common::get_airflow_version_specification -common::override_pip_version_if_needed common::show_packaging_tool_version_and_location - common::install_packaging_tools EOF @@ -415,23 +412,23 @@ function install_airflow_dependencies_from_branch_tip() { set +x common::install_packaging_tools set -x + echo "${COLOR_BLUE}Uninstalling providers. Dependencies remain${COLOR_RESET}" # Uninstall airflow and providers to keep only the dependencies. In the future when # planned https://github.com/pypa/pip/issues/11440 is implemented in pip we might be able to use this # flag and skip the remove step. - ${PACKAGING_TOOL_CMD} freeze | grep apache-airflow-providers | xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} 2>/dev/null || true + pip freeze | grep apache-airflow-providers | xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} || true set +x echo echo "${COLOR_BLUE}Uninstalling just airflow. Dependencies remain. Now target airflow can be reinstalled using mostly cached dependencies${COLOR_RESET}" echo set +x - ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow || true + ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow set -x } common::get_colors common::get_packaging_tool common::get_airflow_version_specification -common::override_pip_version_if_needed common::get_constraints_location common::show_packaging_tool_version_and_location @@ -471,10 +468,15 @@ function common::get_packaging_tool() { echo export PACKAGING_TOOL="uv" export PACKAGING_TOOL_CMD="uv pip" - export EXTRA_INSTALL_FLAGS="--python ${PYTHON_BIN}" - export EXTRA_UNINSTALL_FLAGS="--python ${PYTHON_BIN}" - export RESOLUTION_HIGHEST_FLAG="--resolution highest" - export RESOLUTION_LOWEST_DIRECT_FLAG="--resolution lowest-direct" + if [[ -z ${VIRTUAL_ENV=} ]]; then + export EXTRA_INSTALL_FLAGS="--python ${PYTHON_BIN}" + export EXTRA_UNINSTALL_FLAGS="--python ${PYTHON_BIN}" + else + export EXTRA_INSTALL_FLAGS="" + export EXTRA_UNINSTALL_FLAGS="" + fi + export UPGRADE_EAGERLY="--upgrade --resolution highest" + export UPGRADE_IF_NEEDED="--upgrade --resolution lowest-direct" else echo echo "${COLOR_BLUE}Using 'pip' to install Airflow${COLOR_RESET}" @@ -483,8 +485,8 @@ function common::get_packaging_tool() { export PACKAGING_TOOL_CMD="pip" export EXTRA_INSTALL_FLAGS="--root-user-action ignore" export EXTRA_UNINSTALL_FLAGS="--yes" - export RESOLUTION_HIGHEST_FLAG="--upgrade-strategy eager" - export RESOLUTION_LOWEST_DIRECT_FLAG="--upgrade --upgrade-strategy only-if-needed" + export UPGRADE_EAGERLY="--upgrade --upgrade-strategy eager" + export UPGRADE_IF_NEEDED="--upgrade --upgrade-strategy only-if-needed" fi } @@ -496,14 +498,6 @@ function common::get_airflow_version_specification() { fi } -function common::override_pip_version_if_needed() { - if [[ -n ${AIRFLOW_VERSION} ]]; then - if [[ ${AIRFLOW_VERSION} =~ ^2\.0.* || ${AIRFLOW_VERSION} =~ ^1\.* ]]; then - export AIRFLOW_PIP_VERSION=24.0 - fi - fi -} - function common::get_constraints_location() { if [[ -f "${HOME}/constraints.txt" ]]; then # constraints are already downloaded, do not calculate/override again @@ -550,6 +544,15 @@ function common::show_packaging_tool_version_and_location() { } function common::install_packaging_tools() { + if [[ "${VIRTUAL_ENV=}" != "" ]]; then + echo + echo "${COLOR_BLUE}Checking packaging tools in venv: ${VIRTUAL_ENV}${COLOR_RESET}" + echo + else + echo + echo "${COLOR_BLUE}Checking packaging tools for system Python installation: $(which python)${COLOR_RESET}" + echo + fi if [[ ! ${AIRFLOW_PIP_VERSION} =~ [0-9.]* ]]; then echo echo "${COLOR_BLUE}Installing pip version from spec ${AIRFLOW_PIP_VERSION}${COLOR_RESET}" @@ -655,12 +658,21 @@ COPY <<"EOF" /install_airflow.sh . "$( dirname "${BASH_SOURCE[0]}" )/common.sh" function install_airflow() { - # Coherence check for editable installation mode. - if [[ ${AIRFLOW_INSTALLATION_METHOD} != "." && \ - ${AIRFLOW_INSTALL_EDITABLE_FLAG} == "--editable" ]]; then + # Determine the installation_command_flags based on AIRFLOW_INSTALLATION_METHOD method + local installation_command_flags + if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then + # When installing from sources - we always use `--editable` mode + installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + elif [[ ${AIRFLOW_INSTALLATION_METHOD} == "apache-airflow" ]]; then + installation_command_flags="apache-airflow[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + elif [[ ${AIRFLOW_INSTALLATION_METHOD} == apache-airflow\ @\ * ]]; then + installation_command_flags="apache-airflow[${AIRFLOW_EXTRAS}] @ ${AIRFLOW_VERSION_SPECIFICATION/apache-airflow @//}" + else + echo + echo "${COLOR_RED}The '${INSTALLATION_METHOD}' installation method is not supported${COLOR_RESET}" + echo + echo "${COLOR_YELLOW}Supported methods are ('.', 'apache-airflow', 'apache-airflow @ URL')${COLOR_RESET}" echo - echo "${COLOR_RED}ERROR! You can only use --editable flag when installing airflow from sources!${COLOR_RESET}" - echo "${COLOR_RED} Current installation method is '${AIRFLOW_INSTALLATION_METHOD} and should be '.'${COLOR_RESET}" exit 1 fi # Remove mysql from extras if client is not going to be installed @@ -681,14 +693,10 @@ function install_airflow() { ${PACKAGING_TOOL_CMD} freeze | grep apache-airflow | xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} 2>/dev/null || true set +x echo - echo "${COLOR_BLUE}Installing all packages with eager upgrade with ${AIRFLOW_INSTALL_EDITABLE_FLAG} mode${COLOR_RESET}" + echo "${COLOR_BLUE}Installing all packages in eager upgrade mode. Installation method: ${AIRFLOW_INSTALLATION_METHOD}${COLOR_RESET}" echo set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade ${RESOLUTION_HIGHEST_FLAG} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ - ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_EAGERLY} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags} ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} set +x common::install_packaging_tools echo @@ -697,14 +705,11 @@ function install_airflow() { pip check else echo - echo "${COLOR_BLUE}Installing all packages with constraints${COLOR_RESET}" + echo "${COLOR_BLUE}Installing all packages with constraints. Installation method: ${AIRFLOW_INSTALLATION_METHOD}${COLOR_RESET}" echo set -x # Install all packages with constraints - if ! ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ - --constraint "${HOME}/constraints.txt"; then + if ! ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags} --constraint "${HOME}/constraints.txt"; then set +x echo echo "${COLOR_YELLOW}Likely pyproject.toml has new dependencies conflicting with constraints.${COLOR_RESET}" @@ -712,10 +717,7 @@ function install_airflow() { echo "${COLOR_BLUE}Falling back to no-constraints, lowest-direct resolution installation.${COLOR_RESET}" echo set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade ${RESOLUTION_LOWEST_DIRECT_FLAG} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_IF_NEEDED} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags} fi set +x common::install_packaging_tools @@ -730,7 +732,6 @@ function install_airflow() { common::get_colors common::get_packaging_tool common::get_airflow_version_specification -common::override_pip_version_if_needed common::get_constraints_location common::show_packaging_tool_version_and_location @@ -753,7 +754,7 @@ function install_additional_dependencies() { echo "${COLOR_BLUE}Installing additional dependencies while upgrading to newer dependencies${COLOR_RESET}" echo set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade ${RESOLUTION_HIGHEST_FLAG} \ + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_EAGERLY} \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ ${ADDITIONAL_PYTHON_DEPS} ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} set +x @@ -767,7 +768,7 @@ function install_additional_dependencies() { echo "${COLOR_BLUE}Installing additional dependencies upgrading only if needed${COLOR_RESET}" echo set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade "${RESOLUTION_LOWEST_DIRECT_FLAG}" \ + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_IF_NEEDED} \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ ${ADDITIONAL_PYTHON_DEPS} set +x @@ -782,7 +783,6 @@ function install_additional_dependencies() { common::get_colors common::get_packaging_tool common::get_airflow_version_specification -common::override_pip_version_if_needed common::get_constraints_location common::show_packaging_tool_version_and_location @@ -1243,18 +1243,11 @@ ENV AIRFLOW_REPO=${AIRFLOW_REPO}\ AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \ AIRFLOW_USE_UV=${AIRFLOW_USE_UV} \ -# In the CI image we always: -# * install MySQL, MsSQL -# * install airflow from current sources, not from PyPI package -# * install airflow without `--user` flag -# * install airflow in editable mode -# * install always current version of airflow INSTALL_MYSQL_CLIENT="true" \ INSTALL_MYSQL_CLIENT_TYPE=${INSTALL_MYSQL_CLIENT_TYPE} \ INSTALL_MSSQL_CLIENT="true" \ INSTALL_POSTGRES_CLIENT="true" \ AIRFLOW_INSTALLATION_METHOD="." \ - AIRFLOW_INSTALL_EDITABLE_FLAG="--editable" \ AIRFLOW_VERSION_SPECIFICATION="" \ PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} \ PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \ diff --git a/INSTALL b/INSTALL index 0f1ba985901fc..a1e2034b69937 100644 --- a/INSTALL +++ b/INSTALL @@ -255,7 +255,7 @@ microsoft-mssql, microsoft-psrp, microsoft-winrm, mongo, mssql, mysql, neo4j, od openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, pgvector, pinecone, pinot, postgres, presto, pydantic, qdrant, rabbitmq, redis, s3, s3fs, salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, -spark, sqlite, ssh, statsd, tableau, tabular, telegram, teradata, trino, vertica, virtualenv, +spark, sqlite, ssh, statsd, tableau, tabular, telegram, teradata, trino, uv, vertica, virtualenv, weaviate, webhdfs, winrm, yandex, zendesk # END REGULAR EXTRAS HERE diff --git a/contributing-docs/12_airflow_dependencies_and_extras.rst b/contributing-docs/12_airflow_dependencies_and_extras.rst index 6dcb92575a37a..bf16efd91de12 100644 --- a/contributing-docs/12_airflow_dependencies_and_extras.rst +++ b/contributing-docs/12_airflow_dependencies_and_extras.rst @@ -211,7 +211,7 @@ microsoft-mssql, microsoft-psrp, microsoft-winrm, mongo, mssql, mysql, neo4j, od openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, pgvector, pinecone, pinot, postgres, presto, pydantic, qdrant, rabbitmq, redis, s3, s3fs, salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, -spark, sqlite, ssh, statsd, tableau, tabular, telegram, teradata, trino, vertica, virtualenv, +spark, sqlite, ssh, statsd, tableau, tabular, telegram, teradata, trino, uv, vertica, virtualenv, weaviate, webhdfs, winrm, yandex, zendesk .. END REGULAR EXTRAS HERE diff --git a/contributing-docs/testing/k8s_tests.rst b/contributing-docs/testing/k8s_tests.rst index 8a7383f90c2be..4c00ec42a6893 100644 --- a/contributing-docs/testing/k8s_tests.rst +++ b/contributing-docs/testing/k8s_tests.rst @@ -358,7 +358,14 @@ Should show the status of current KinD cluster. Cluster healthy: airflow-python-3.8-v1.24.2 5. Build the image base on PROD Airflow image. You need to build the PROD image first (the command will - guide you if you did not - either by running the build separately or passing ``--rebuild-base-image`` flag + guide you if you did not) either by running the build separately or passing ``--rebuild-base-image`` + flag. Generally speaking you should not need to rebuild the base image unless you changed some + dependencies in ``pyproject.toml``. + + Note, that this command by default uses ``--use-uv`` flag to use ``uv`` to build the image instead of + ``pip``. This is much faster (50% faster) to rebuild the image and iterate with your code but if you + built your PROD image without ``--use-uv`` flag the first build might be a bit longer. You can also switch + to using a ``pip`` based image by specifyin ``--no-use-uv`` flag together with ``--rebuid-base-image``. .. code-block:: bash diff --git a/dev/breeze/doc/images/output_ci-image_build.svg b/dev/breeze/doc/images/output_ci-image_build.svg index f617eb05e72aa..48b98e5904acc 100644 --- a/dev/breeze/doc/images/output_ci-image_build.svg +++ b/dev/breeze/doc/images/output_ci-image_build.svg @@ -390,7 +390,7 @@ --python-imageIf specified this is the base python image used to build the image. Should be      something like: python:VERSION-slim-bookworm.                                      (TEXT)                                                                             ---use-uv/--no-use-uvUse uv instead of pip as packaging tool. +--use-uv/--no-use-uvUse uv instead of pip as packaging tool to build the image.[default: use-uv] ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Selecting constraint location (for power users) ────────────────────────────────────────────────────────────────────╮ --airflow-constraints-locationLocation of airflow constraints to use (remote URL or local context file).(TEXT) diff --git a/dev/breeze/doc/images/output_ci-image_build.txt b/dev/breeze/doc/images/output_ci-image_build.txt index b59fb9fc0b24d..d32ec3ed79812 100644 --- a/dev/breeze/doc/images/output_ci-image_build.txt +++ b/dev/breeze/doc/images/output_ci-image_build.txt @@ -1 +1 @@ -f535999147ac00393852eb3b28d7125b +775924a9beade1c361b7b0d127e21321 diff --git a/dev/breeze/doc/images/output_k8s_build-k8s-image.svg b/dev/breeze/doc/images/output_k8s_build-k8s-image.svg index 362f24d0e8360..c54802ce41769 100644 --- a/dev/breeze/doc/images/output_k8s_build-k8s-image.svg +++ b/dev/breeze/doc/images/output_k8s_build-k8s-image.svg @@ -1,4 +1,4 @@ - + ╭─ Build image flags ──────────────────────────────────────────────────────────────────────────────────────────────────╮ --python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) [default: 3.8]                                               ---rebuild-base-imageRebuilds base Airflow image before building K8S image. ---image-tag-tImage tag used to build K8S image from.(TEXT)[default: latest] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Parallel options ───────────────────────────────────────────────────────────────────────────────────────────────────╮ ---run-in-parallelRun the operation in parallel on all or selected subset of parameters. ---parallelismMaximum number of processes to use while running the operation in parallel. -(INTEGER RANGE)                                                             -[default: 4; 1<=x<=8]                                                       ---python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) -[default: 3.8 3.9 3.10 3.11]                                                   ---skip-cleanupSkip cleanup of temporary files created during parallel run. ---debug-resourcesWhether to show resource information while running in parallel. ---include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +--image-tag-tImage tag used to build K8S image from.(TEXT)[default: latest] +--rebuild-base-imageRebuilds base Airflow image before building K8S image. +--use-uv/--no-use-uvUse uv instead of pip as packaging tool to build the image.[default: use-uv] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Parallel options ───────────────────────────────────────────────────────────────────────────────────────────────────╮ +--run-in-parallelRun the operation in parallel on all or selected subset of parameters. +--parallelismMaximum number of processes to use while running the operation in parallel. +(INTEGER RANGE)                                                             +[default: 4; 1<=x<=8]                                                       +--python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) +[default: 3.8 3.9 3.10 3.11]                                                   +--skip-cleanupSkip cleanup of temporary files created during parallel run. +--debug-resourcesWhether to show resource information while running in parallel. +--include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--verbose-vPrint verbose information about performed steps. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/dev/breeze/doc/images/output_k8s_build-k8s-image.txt b/dev/breeze/doc/images/output_k8s_build-k8s-image.txt index ee7491ec5c0d3..c375b65d68cf8 100644 --- a/dev/breeze/doc/images/output_k8s_build-k8s-image.txt +++ b/dev/breeze/doc/images/output_k8s_build-k8s-image.txt @@ -1 +1 @@ -b625255c3e8f3f794ee404f9a4476836 +2910945bcb00e309668da7a64773b859 diff --git a/dev/breeze/doc/images/output_k8s_run-complete-tests.svg b/dev/breeze/doc/images/output_k8s_run-complete-tests.svg index b41a7cd71fcdf..e981bbfa58865 100644 --- a/dev/breeze/doc/images/output_k8s_run-complete-tests.svg +++ b/dev/breeze/doc/images/output_k8s_run-complete-tests.svg @@ -1,4 +1,4 @@ - +