From 010855e6f0f1ebb7d855f631a311c23c1b21a64b Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Mon, 29 Jul 2024 15:38:40 +0200 Subject: [PATCH 01/23] WIP --- src/zenml/config/build_configuration.py | 56 ++- src/zenml/config/docker_settings.py | 62 ++- .../base_entrypoint_configuration.py | 45 +- src/zenml/enums.py | 9 + .../models/v2/core/pipeline_deployment.py | 50 +-- src/zenml/models/v2/misc/build_item.py | 24 +- src/zenml/new/pipelines/build_utils.py | 421 +++++++++++++++++- src/zenml/new/pipelines/pipeline.py | 25 +- .../utils/pipeline_docker_image_builder.py | 4 +- .../schemas/pipeline_deployment_schemas.py | 3 + 10 files changed, 607 insertions(+), 92 deletions(-) diff --git a/src/zenml/config/build_configuration.py b/src/zenml/config/build_configuration.py index 9fcc7ee0145..b812312c27a 100644 --- a/src/zenml/config/build_configuration.py +++ b/src/zenml/config/build_configuration.py @@ -60,7 +60,11 @@ def compute_settings_checksum( The checksum. """ hash_ = hashlib.md5() # nosec - hash_.update(self.settings.model_dump_json().encode()) + hash_.update( + self.settings.model_dump_json( + exclude={"prevent_build_reuse"} + ).encode() + ) if self.entrypoint: hash_.update(self.entrypoint.encode()) @@ -72,7 +76,7 @@ def compute_settings_checksum( PipelineDockerImageBuilder, ) - pass_code_repo = self.should_download_files( + pass_code_repo = self.should_download_files_from_code_repository( code_repository=code_repository ) requirements_files = ( @@ -101,13 +105,10 @@ def should_include_files( Returns: Whether files should be included in the image. """ - if self.settings.source_files == SourceFileMode.INCLUDE: - return True + if self.should_download_files(code_repository=code_repository): + return False - if ( - self.settings.source_files == SourceFileMode.DOWNLOAD_OR_INCLUDE - and not code_repository - ): + if SourceFileMode.INCLUDE in self.settings.source_files: return True return False @@ -125,10 +126,37 @@ def should_download_files( Returns: Whether files should be downloaded in the image. """ - if not code_repository: - return False + if self.should_download_files_from_code_repository( + code_repository=code_repository + ): + return True + + if ( + SourceFileMode.DOWNLOAD_FROM_ARTIFACT_STORE + in self.settings.source_files + ): + return True + + return False - return self.settings.source_files in { - SourceFileMode.DOWNLOAD, - SourceFileMode.DOWNLOAD_OR_INCLUDE, - } + def should_download_files_from_code_repository( + self, + code_repository: Optional["BaseCodeRepository"], + ) -> bool: + """Whether files should be downloaded from the code repository. + + Args: + code_repository: Code repository that can be used to download files + inside the image. + + Returns: + Whether files should be downloaded from the code repository. + """ + if ( + code_repository + and SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY + in self.settings.source_files + ): + return True + + return False diff --git a/src/zenml/config/docker_settings.py b/src/zenml/config/docker_settings.py index 083d564fdd5..60dc1ae6e91 100644 --- a/src/zenml/config/docker_settings.py +++ b/src/zenml/config/docker_settings.py @@ -14,7 +14,7 @@ """Docker settings.""" from enum import Enum -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Set, Union from pydantic import BaseModel, Field, model_validator from pydantic_settings import SettingsConfigDict @@ -53,9 +53,15 @@ class SourceFileMode(Enum): """Different methods to handle source files in Docker images.""" INCLUDE = "include" - DOWNLOAD_OR_INCLUDE = "download_or_include" - DOWNLOAD = "download" - IGNORE = "ignore" + DOWNLOAD_FROM_CODE_REPOSITORY = "download_from_code_repository" + DOWNLOAD_FROM_ARTIFACT_STORE = "download_from_artifact_store" + + +DEFAULT_SOURCE_FILE_MODE = { + SourceFileMode.INCLUDE, + SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY, + SourceFileMode.DOWNLOAD_FROM_ARTIFACT_STORE, +} class PythonPackageInstaller(Enum): @@ -139,6 +145,7 @@ class DockerSettings(BaseSettings): instead. skip_build: If set to `True`, the parent image will be used directly to run the steps of your pipeline. + prevent_build_reuse: Prevent the reuse of an existing build. target_repository: Name of the Docker repository to which the image should be pushed. This repository will be appended to the registry URI of the container registry of your stack and should @@ -177,7 +184,7 @@ class DockerSettings(BaseSettings): Docker image. build_config: Configuration for the main image build. dockerignore: DEPRECATED, use build_config.dockerignore instead. - copy_files: DEPRECATED, use the `source_files` attribute instead. + copy_files: DEPRECATED/UNUSED. copy_global_config: DEPRECATED/UNUSED. user: If not `None`, will set the user, make it owner of the `/app` directory which contains all the user code and run the container @@ -203,6 +210,7 @@ class DockerSettings(BaseSettings): build_options: Dict[str, Any] = {} parent_image_build_config: Optional[DockerBuildConfig] = None skip_build: bool = False + prevent_build_reuse: bool = False target_repository: Optional[str] = None python_package_installer: PythonPackageInstaller = ( PythonPackageInstaller.PIP @@ -225,7 +233,7 @@ class DockerSettings(BaseSettings): user: Optional[str] = None build_config: Optional[DockerBuildConfig] = None - source_files: SourceFileMode = SourceFileMode.DOWNLOAD_OR_INCLUDE + source_files: Set[SourceFileMode] = DEFAULT_SOURCE_FILE_MODE _deprecation_validator = deprecation_utils.deprecate_pydantic_attributes( "copy_files", "copy_global_config" @@ -234,30 +242,38 @@ class DockerSettings(BaseSettings): @model_validator(mode="before") @classmethod @before_validator_handler - def _migrate_copy_files(cls, data: Dict[str, Any]) -> Dict[str, Any]: - """Migrates the value from the old copy_files attribute. + def _migrate_source_files(cls, data: Dict[str, Any]) -> Dict[str, Any]: + """Migrates the value from the old source_files attributes. Args: data: The settings values. + Raises: + ValueError: If an invalid source file mode is specified. + Returns: The migrated settings values. """ - copy_files = data.get("copy_files", None) - - if copy_files is None: - return data - - if data.get("source_files", None): - # Ignore the copy files value in favor of the new source files - logger.warning( - "Both `copy_files` and `source_files` specified for the " - "DockerSettings, ignoring the `copy_files` value." - ) - elif copy_files is True: - data["source_files"] = SourceFileMode.INCLUDE - elif copy_files is False: - data["source_files"] = SourceFileMode.IGNORE + source_files = data.get("source_files", None) + + if isinstance(source_files, str): + if source_files == "download": + new_source_files = { + SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY + } + elif source_files == "download_or_include": + new_source_files = { + SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY, + SourceFileMode.INCLUDE, + } + elif source_files == "ignore": + new_source_files = set() + elif source_files == "include": + new_source_files = {SourceFileMode.INCLUDE} + else: + raise ValueError(f"Invalid source file mode `{source_files}`.") + + data["source_files"] = new_source_files return data diff --git a/src/zenml/entrypoints/base_entrypoint_configuration.py b/src/zenml/entrypoints/base_entrypoint_configuration.py index 64472087cd6..2fd25d306d4 100644 --- a/src/zenml/entrypoints/base_entrypoint_configuration.py +++ b/src/zenml/entrypoints/base_entrypoint_configuration.py @@ -15,6 +15,7 @@ import argparse import os +import shutil import sys from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Set @@ -26,11 +27,12 @@ ENV_ZENML_REQUIRES_CODE_DOWNLOAD, handle_bool_env_var, ) +from zenml.io import fileio from zenml.logger import get_logger from zenml.utils import code_repository_utils, source_utils, uuid_utils if TYPE_CHECKING: - from zenml.models import PipelineDeploymentResponse + from zenml.models import CodeReferenceResponse, PipelineDeploymentResponse logger = get_logger(__name__) DEFAULT_ENTRYPOINT_COMMAND = [ @@ -207,17 +209,28 @@ def download_code_if_necessary( if not requires_code_download: return - code_reference = deployment.code_reference - if not code_reference: + if code_reference := deployment.code_reference: + self.download_code_from_code_repository( + code_reference=code_reference + ) + elif code_path := deployment.code_path: + self.download_code_from_artifact_store(code_path=code_path) + else: raise RuntimeError( - "Code download required but no code reference provided." + "Code download required but no code reference or path provided." ) + logger.info("Code download finished.") + + def download_code_from_code_repository( + self, code_reference: "CodeReferenceResponse" + ) -> None: logger.info( "Downloading code from code repository `%s` (commit `%s`).", code_reference.code_repository.name, code_reference.commit, ) + model = Client().get_code_repository(code_reference.code_repository.id) repo = BaseCodeRepository.from_model(model) code_repo_root = os.path.abspath("code") @@ -237,7 +250,29 @@ def download_code_if_necessary( # Add downloaded file directory to python path sys.path.insert(0, download_dir) - logger.info("Code download finished.") + def download_code_from_artifact_store(self, code_path: str) -> None: + logger.info( + "Downloading code from artifact store path `%s`.", code_path + ) + + # Do not remove this line, we need to instantiate the artifact store to + # register the filesystem needed for the file download + artifact_store = Client().active_stack.artifact_store + + if not code_path.startswith(artifact_store.path): + raise RuntimeError("Code stored in different artifact store.") + + extract_dir = os.path.abspath("code") + os.makedirs(extract_dir) + + download_path = "code.tar" + fileio.copy(code_path, download_path) + + shutil.unpack_archive(filename=download_path, extract_dir=extract_dir) + os.remove(download_path) + + source_utils.set_custom_source_root(extract_dir) + sys.path.insert(0, extract_dir) @abstractmethod def run(self) -> None: diff --git a/src/zenml/enums.py b/src/zenml/enums.py index 2df9e6af793..4b1b7997b80 100644 --- a/src/zenml/enums.py +++ b/src/zenml/enums.py @@ -410,3 +410,12 @@ class StackDeploymentProvider(StrEnum): AWS = "aws" GCP = "gcp" AZURE = "azure" + + +class RequirementType(StrEnum): + UNKNOWN = "unknown" + STACK = "stack" + USER = "user" # maybe pipeline? + INTEGRATION = "integration" + LOCAL_ENVIRONMENT = "local_environment" + CODE_REPOSITORY = "code_repository" diff --git a/src/zenml/models/v2/core/pipeline_deployment.py b/src/zenml/models/v2/core/pipeline_deployment.py index f12edd9e0d6..760f65f1a35 100644 --- a/src/zenml/models/v2/core/pipeline_deployment.py +++ b/src/zenml/models/v2/core/pipeline_deployment.py @@ -18,7 +18,6 @@ from pydantic import Field -from zenml.config.docker_settings import SourceFileMode from zenml.config.pipeline_configurations import PipelineConfiguration from zenml.config.pipeline_spec import PipelineSpec from zenml.config.step_configurations import Step @@ -82,26 +81,14 @@ class PipelineDeploymentBase(BaseZenModel): ) @property - def requires_included_files(self) -> bool: - """Whether the deployment requires included files. + def should_prevent_build_reuse(self) -> bool: + """Whether the deployment prevents a build reuse. Returns: - Whether the deployment requires included files. + Whether the deployment prevents a build reuse. """ return any( - step.config.docker_settings.source_files == SourceFileMode.INCLUDE - for step in self.step_configurations.values() - ) - - @property - def requires_code_download(self) -> bool: - """Whether the deployment requires downloading some code files. - - Returns: - Whether the deployment requires downloading some code files. - """ - return any( - step.config.docker_settings.source_files == SourceFileMode.DOWNLOAD + step.config.docker_settings.prevent_build_reuse for step in self.step_configurations.values() ) @@ -125,6 +112,10 @@ class PipelineDeploymentRequest( default=None, title="The code reference associated with the deployment.", ) + code_path: Optional[str] = Field( + default=None, + title="Optional path where the code is stored in the artifact store.", + ) template: Optional[UUID] = Field( default=None, description="Template used for the deployment.", @@ -169,6 +160,10 @@ class PipelineDeploymentResponseMetadata(WorkspaceScopedResponseMetadata): pipeline_spec: Optional[PipelineSpec] = Field( default=None, title="The pipeline spec of the deployment." ) + code_path: Optional[str] = Field( + default=None, + title="Optional path where the code is stored in the artifact store.", + ) pipeline: Optional[PipelineResponse] = Field( default=None, title="The pipeline associated with the deployment." @@ -293,6 +288,15 @@ def pipeline_spec(self) -> Optional[PipelineSpec]: """ return self.get_metadata().pipeline_spec + @property + def code_path(self) -> Optional[str]: + """The `code_path` property. + + Returns: + the value of the property. + """ + return self.get_metadata().code_path + @property def pipeline(self) -> Optional[PipelineResponse]: """The `pipeline` property. @@ -347,18 +351,6 @@ def template_id(self) -> Optional[UUID]: """ return self.get_metadata().template_id - @property - def requires_code_download(self) -> bool: - """Whether the deployment requires downloading some code files. - - Returns: - Whether the deployment requires downloading some code files. - """ - return any( - step.config.docker_settings.source_files == SourceFileMode.DOWNLOAD - for step in self.step_configurations.values() - ) - # ------------------ Filter Model ------------------ diff --git a/src/zenml/models/v2/misc/build_item.py b/src/zenml/models/v2/misc/build_item.py index 13d35ddefd9..1f288219ee3 100644 --- a/src/zenml/models/v2/misc/build_item.py +++ b/src/zenml/models/v2/misc/build_item.py @@ -13,9 +13,12 @@ # permissions and limitations under the License. """Model definition for pipeline build item.""" -from typing import Optional +import itertools +from typing import Dict, List, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator + +from zenml.enums import RequirementType class BuildItem(BaseModel): @@ -38,6 +41,9 @@ class BuildItem(BaseModel): requirements: Optional[str] = Field( default=None, title="The pip requirements installed in the image." ) + pypi_requirements: Dict[RequirementType, List[str]] = {} + apt_requirements: Dict[RequirementType, List[str]] = {} + settings_checksum: Optional[str] = Field( default=None, title="The checksum of the build settings." ) @@ -47,3 +53,17 @@ class BuildItem(BaseModel): requires_code_download: bool = Field( default=False, title="Whether the image needs to download files." ) + + @model_validator(mode="after") + def _build_item_validator(self) -> "BuildItem": + if not self.pypi_requirements: + if self.requirements: + self.pypi_requirements = { + RequirementType.UNKNOWN: self.requirements.splitlines() + } + elif not self.requirements: + self.requirements = "\n".join( + itertools.chain.from_iterable(self.pypi_requirements.values()) + ) + + return self diff --git a/src/zenml/new/pipelines/build_utils.py b/src/zenml/new/pipelines/build_utils.py index fdde3e9c089..071d88aabc7 100644 --- a/src/zenml/new/pipelines/build_utils.py +++ b/src/zenml/new/pipelines/build_utils.py @@ -14,22 +14,32 @@ """Pipeline build utilities.""" import hashlib +import os import platform +import tempfile +from pathlib import Path from typing import ( + IO, TYPE_CHECKING, Dict, List, Optional, + Tuple, Union, ) from uuid import UUID +from git.repo.base import Repo + import zenml from zenml.client import Client from zenml.code_repositories import BaseCodeRepository +from zenml.config.docker_settings import SourceFileMode +from zenml.io import fileio from zenml.logger import get_logger from zenml.models import ( BuildItem, + CodeReferenceRequest, PipelineBuildBase, PipelineBuildRequest, PipelineBuildResponse, @@ -39,6 +49,7 @@ from zenml.stack import Stack from zenml.utils import ( source_utils, + string_utils, ) from zenml.utils.pipeline_docker_image_builder import ( PipelineDockerImageBuilder, @@ -64,6 +75,59 @@ def build_required(deployment: "PipelineDeploymentBase") -> bool: return bool(stack.get_docker_builds(deployment=deployment)) +def requires_included_code( + deployment: "PipelineDeploymentBase", + code_repository: Optional["BaseCodeRepository"] = None, +) -> bool: + """Checks whether the deployment requires included code. + + Args: + deployment: The deployment. + code_repository: If provided, this code repository can be used to + download the code inside the container images. + + Returns: + If the deployment requires code included in the container images. + """ + for step in deployment.step_configurations.values(): + if step.config.docker_settings.source_files == { + SourceFileMode.INCLUDE + }: + return True + + if ( + step.config.docker_settings.source_files + == { + SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY, + SourceFileMode.INCLUDE, + } + and not code_repository + ): + return True + + return False + + +def requires_download_from_code_repository( + deployment: "PipelineDeploymentBase", +) -> bool: + """Checks whether the deployment needs to download code from a repository. + + Args: + deployment: The deployment. + + Returns: + If the deployment needs to download code from a code repository. + """ + return any( + step.config.docker_settings.source_files + == { + SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY, + } + for step in deployment.step_configurations.values() + ) + + def reuse_or_create_pipeline_build( deployment: "PipelineDeploymentBase", allow_build_reuse: bool, @@ -82,8 +146,8 @@ def reuse_or_create_pipeline_build( build: Optional existing build. If given, the build will be fetched (or registered) in the database. If not given, a new build will be created. - code_repository: If provided, this code repository will be used to - download inside the build images. + code_repository: If provided, this code repository can be used to + download code inside the container images. Returns: The build response. @@ -91,8 +155,9 @@ def reuse_or_create_pipeline_build( if not build: if ( allow_build_reuse - and code_repository - and not deployment.requires_included_files + and not requires_included_code( + deployment=deployment, code_repository=code_repository + ) and build_required(deployment=deployment) ): existing_build = find_existing_build( @@ -108,17 +173,13 @@ def reuse_or_create_pipeline_build( return existing_build else: logger.info( - "Unable to find a build to reuse. When using a code " - "repository, a previous build can be reused when the " - "following conditions are met:\n" + "Unable to find a build to reuse. A previous build can be " + "reused when the following conditions are met:\n" " * The existing build was created for the same stack, " "ZenML version and Python version\n" " * The stack contains a container registry\n" " * The Docker settings of the pipeline and all its steps " - "are the same as for the existing build\n" - " * The build does not include code. This will only be " - "the case if the existing build was created with a clean " - "code repository." + "are the same as for the existing build." ) return create_pipeline_build( @@ -150,7 +211,7 @@ def reuse_or_create_pipeline_build( def find_existing_build( deployment: "PipelineDeploymentBase", - code_repository: "BaseCodeRepository", + code_repository: Optional["BaseCodeRepository"] = None, ) -> Optional["PipelineBuildResponse"]: """Find an existing build for a deployment. @@ -280,6 +341,11 @@ def create_pipeline_build( download_files = build_config.should_download_files( code_repository=code_repository, ) + pass_code_repo = ( + build_config.should_download_files_from_code_repository( + code_repository=code_repository + ) + ) ( image_name_or_digest, @@ -293,7 +359,7 @@ def create_pipeline_build( download_files=download_files, entrypoint=build_config.entrypoint, extra_files=build_config.extra_files, - code_repository=code_repository, + code_repository=code_repository if pass_code_repo else None, ) contains_code = include_files @@ -389,7 +455,7 @@ def verify_local_repository_context( deployment, or None if code download is not possible. """ if build_required(deployment=deployment): - if deployment.requires_code_download: + if requires_download_from_code_repository(deployment=deployment): if not local_repo_context: raise RuntimeError( "The `DockerSettings` of the pipeline or one of its " @@ -561,3 +627,330 @@ def compute_stack_checksum(stack: StackResponse) -> str: hash_.update(integration.encode()) return hash_.hexdigest() + + +def should_upload_code( + deployment: PipelineDeploymentBase, + build: Optional[PipelineBuildResponse], + code_reference: Optional[CodeReferenceRequest], +) -> bool: + """Checks whether the current code should be uploaded for the deployment. + + Args: + deployment: The deployment. + build: The build for the deployment. + code_reference: The code reference for the deployment. + + Returns: + Whether the current code should be uploaded for the deployment. + """ + if not build: + # No build means all the code is getting executed locally, which means + # we don't need to download any code + # TODO: This does not apply to e.g. Databricks, figure out a solution + # here + return False + + for step in deployment.step_configurations.values(): + source_files = step.config.docker_settings.source_files + + if ( + code_reference + and SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY in source_files + ): + # No upload needed for this step + continue + + if SourceFileMode.DOWNLOAD_FROM_ARTIFACT_STORE in source_files: + break + else: + # Downloading code in the Docker images is prevented by Docker settings + return False + + return True + + +class UploadContext: + def __init__( + self, + root: str, + ) -> None: + """Initializes a build context. + + Args: + root: Optional root directory for the build context. + dockerignore_file: Optional path to a dockerignore file. If not + given, a file called `.dockerignore` in the build context root + directory will be used instead if it exists. + """ + self._root = root + self._extra_files: Dict[str, str] = {} + + def add_file(self, source: str, destination: str) -> None: + """Adds a file to the build context. + + Args: + source: The source of the file to add. This can either be a path + or the file content. + destination: The path inside the build context where the file + should be added. + """ + if fileio.exists(source): + with fileio.open(source) as f: + self._extra_files[destination] = f.read() + else: + self._extra_files[destination] = source + + def add_directory(self, source: str, destination: str) -> None: + """Adds a directory to the build context. + + Args: + source: Path to the directory. + destination: The path inside the build context where the directory + should be added. + + Raises: + ValueError: If `source` does not point to a directory. + """ + if not fileio.isdir(source): + raise ValueError( + f"Can't add directory {source} to the build context as it " + "does not exist or is not a directory." + ) + + for dir, _, files in fileio.walk(source): + dir_path = Path(fileio.convert_to_str(dir)) + for file_name in files: + file_name = fileio.convert_to_str(file_name) + file_source = dir_path / file_name + file_destination = ( + Path(destination) + / dir_path.relative_to(source) + / file_name + ) + + with file_source.open("r") as f: + self._extra_files[file_destination.as_posix()] = f.read() + + def write_archive(self, output_file: IO[bytes], gzip: bool = True) -> None: + """Writes an archive of the build context to the given file. + + Args: + output_file: The file to write the archive to. + gzip: Whether to use `gzip` to compress the file. + """ + from docker.utils import build as docker_build_utils + + files = self._get_files() + extra_files = self._get_extra_files() + + context_archive = docker_build_utils.create_archive( + fileobj=output_file, + root=self._root, + files=files, + gzip=gzip, + extra_files=extra_files, + ) + + build_context_size = os.path.getsize(context_archive.name) + if build_context_size > 50 * 1024 * 1024: + logger.warning( + "Code upload size: `%s`. If you believe this is " + "unreasonably large, make sure to include unnecessary files in " + "a `.gitignore` file.", + string_utils.get_human_readable_filesize(build_context_size), + ) + + @property + def git_repo(self) -> Optional[Repo]: + try: + # These imports fail when git is not installed on the machine + from git.exc import InvalidGitRepositoryError + from git.repo.base import Repo + except ImportError: + return None + + try: + git_repo = Repo(path=self._root, search_parent_directories=True) + except InvalidGitRepositoryError: + return None + + return git_repo + + def _get_files(self) -> Optional[List[str]]: + if repo := self.git_repo: + try: + result = repo.git.ls_files( + "--cached", + "--others", + "--modified", + "--exclude-standard", + self._root, + ) + except Exception as e: + logger.warning( + "Failed to get non-ignored files from git: %s", str(e) + ) + else: + files = set() + for file in result.split(): + relative_path = os.path.relpath( + os.path.join(repo.working_dir, file), self._root + ) + if os.path.exists(relative_path): + files.add(relative_path) + + return sorted(files) + + return None + + def _get_extra_files(self) -> List[Tuple[str, str]]: + """Gets all extra files of the build context. + + Returns: + A tuple (path, file_content) for all extra files in the build + context. + """ + return list(self._extra_files.items()) + + +# TODO: which files to include? gitignore, dockerignore, zenignore? +def upload_code_if_necessary() -> str: + """Upload code to the artifact store if necessary. + + This function computes a hash of the code to be uploaded, and if an archive + with the same hash already exists it will not re-upload but instead return + the path to the existing archive. + + Returns: + The path where to archived code is uploaded. + """ + upload_context = UploadContext(root=source_utils.get_source_root()) + artifact_store = Client().active_stack.artifact_store + + with tempfile.NamedTemporaryFile(mode="w+b", delete=True) as f: + # Don't use gzip as that includes the creation timestamp of the + # compressed tar file, which means the hash changes each time. This + # means currently the archive is not compressed, which should be + # changed. + upload_context.write_archive(f, gzip=False) + + hash_ = hashlib.sha1() # nosec + + while True: + data = f.read(64 * 1024) + if not data: + break + hash_.update(data) + + filename = f"{hash_.hexdigest()}.tar" + upload_dir = os.path.join(artifact_store.path, "code_uploads") + fileio.makedirs(upload_dir) + upload_path = os.path.join(upload_dir, filename) + + if not fileio.exists(upload_path): + logger.info("Uploading code to `%s`.", upload_path) + fileio.copy(f.name, upload_path) + logger.info("Code upload finished.") + else: + logger.debug( + "Code already exists in artifact store, not uploading." + ) + + return upload_path + + +# import os +# import subprocess +# from typing import Tuple + +# from zenml.code_repositories import BaseCodeRepository +# from zenml.config.docker_settings import ( +# DockerSettings, +# PythonEnvironmentExportMethod, +# ) +# from zenml.enums import OperatingSystemType, RequirementType +# from zenml.integrations.registry import integration_registry +# from zenml.stack import Stack +# from zenml.utils import io_utils + + +# def extract_requirements( +# docker_settings: DockerSettings, +# stack: "Stack", +# code_repository: Optional["BaseCodeRepository"] = None, +# ) -> Tuple[Dict[RequirementType, List[str]], Dict[RequirementType, List[str]]]: +# pypi_requirements = {} +# apt_requirements = {} + +# if docker_settings.install_stack_requirements: +# if stack_pypi_requirements := stack.requirements(): +# pypi_requirements[RequirementType.STACK] = sorted( +# stack_pypi_requirements +# ) + +# if stack_apt_requirements := stack.apt_packages: +# apt_requirements[RequirementType.STACK] = stack_apt_requirements + +# if code_repository: +# pypi_requirements[RequirementType.CODE_REPOSITORY] = sorted( +# code_repository.requirements +# ) + +# if docker_settings.replicate_local_python_environment: +# if isinstance( +# docker_settings.replicate_local_python_environment, +# PythonEnvironmentExportMethod, +# ): +# command = ( +# docker_settings.replicate_local_python_environment.command +# ) +# else: +# command = " ".join( +# docker_settings.replicate_local_python_environment +# ) + +# try: +# local_requirements = subprocess.check_output( +# command, +# shell=True, # nosec +# ).decode() +# except subprocess.CalledProcessError as e: +# raise RuntimeError( +# "Unable to export local python packages." +# ) from e + +# pypi_requirements[RequirementType.LOCAL_ENVIRONMENT] = ( +# local_requirements.splitlines() +# ) + +# if docker_settings.required_integrations: +# for integration_name in docker_settings.required_integrations: +# integration = integration_registry.integrations[integration_name] +# pypi_requirements[RequirementType.INTEGRATION] = ( +# integration.get_requirements( +# target_os=OperatingSystemType.LINUX +# ) +# ) +# apt_requirements[RequirementType.INTEGRATION] = ( +# integration.APT_PACKAGES +# ) + +# if isinstance(docker_settings.requirements, str): +# path = os.path.abspath(docker_settings.requirements) +# try: +# user_requirements = io_utils.read_file_contents_as_string(path) +# except FileNotFoundError as e: +# raise FileNotFoundError( +# f"Requirements file {path} does not exist." +# ) from e + +# pypi_requirements[RequirementType.USER] = ( +# user_requirements.splitlines() +# ) +# elif isinstance(docker_settings.requirements, List): +# pypi_requirements[RequirementType.USER] = docker_settings.requirements + +# apt_requirements[RequirementType.USER] = docker_settings.apt_packages + +# return pypi_requirements, apt_requirements diff --git a/src/zenml/new/pipelines/pipeline.py b/src/zenml/new/pipelines/pipeline.py index 78c9abfc622..455f0aa58d4 100644 --- a/src/zenml/new/pipelines/pipeline.py +++ b/src/zenml/new/pipelines/pipeline.py @@ -579,7 +579,8 @@ def _run( method. unlisted: Whether the pipeline run should be unlisted (not assigned to any pipeline). - prevent_build_reuse: Whether to prevent the reuse of a build. + prevent_build_reuse: DEPRECATED: Use + `DockerSettings.prevent_build_reuse` instead. Returns: Model of the pipeline run if running without a schedule, `None` if @@ -677,6 +678,16 @@ def _run( deployment=deployment, local_repo_context=local_repo_context ) + if prevent_build_reuse: + logger.warning( + "Passing `prevent_build_reuse=True` to " + "`pipeline.with_opitions(...)` is deprecated. Use " + "`DockerSettings.prevent_build_reuse` instead." + ) + + prevent_build_reuse = ( + prevent_build_reuse or deployment.should_prevent_build_reuse + ) build_model = build_utils.reuse_or_create_pipeline_build( deployment=deployment, pipeline_id=pipeline_id, @@ -701,6 +712,14 @@ def _run( code_repository=local_repo_context.code_repository_id, ) + code_path = None + if build_utils.should_upload_code( + deployment=deployment, + build=build_model, + code_reference=code_reference, + ): + code_path = build_utils.upload_code_if_necessary() + deployment_request = PipelineDeploymentRequest( user=Client().active_user.id, workspace=Client().active_workspace.id, @@ -709,6 +728,7 @@ def _run( build=build_id, schedule=schedule_id, code_reference=code_reference, + code_path=code_path, **deployment.model_dump(), ) deployment_model = Client().zen_store.create_deployment( @@ -1271,7 +1291,8 @@ def with_options( method. unlisted: Whether the pipeline run should be unlisted (not assigned to any pipeline). - prevent_build_reuse: Whether to prevent the reuse of a build. + prevent_build_reuse: DEPRECATED: Use + `DockerSettings.prevent_build_reuse` instead. **kwargs: Pipeline configuration options. These will be passed to the `pipeline.configure(...)` method. diff --git a/src/zenml/utils/pipeline_docker_image_builder.py b/src/zenml/utils/pipeline_docker_image_builder.py index d5c43c9e5bc..1f4a3ee4522 100644 --- a/src/zenml/utils/pipeline_docker_image_builder.py +++ b/src/zenml/utils/pipeline_docker_image_builder.py @@ -277,9 +277,7 @@ def build_docker_image( requirements_files = self.gather_requirements_files( docker_settings=docker_settings, stack=stack, - # Only pass code repo to include its dependencies if we actually - # need to download code - code_repository=code_repository if download_files else None, + code_repository=code_repository, ) self._add_requirements_files( diff --git a/src/zenml/zen_stores/schemas/pipeline_deployment_schemas.py b/src/zenml/zen_stores/schemas/pipeline_deployment_schemas.py index 6b8c5ef93c1..ae2fe609bcf 100644 --- a/src/zenml/zen_stores/schemas/pipeline_deployment_schemas.py +++ b/src/zenml/zen_stores/schemas/pipeline_deployment_schemas.py @@ -84,6 +84,7 @@ class PipelineDeploymentSchema(BaseSchema, table=True): nullable=True, ) ) + code_path: Optional[str] = Field(nullable=True) # Foreign keys user_id: Optional[UUID] = build_foreign_key_field( @@ -207,6 +208,7 @@ def from_request( ) if request.pipeline_spec else None, + code_path=request.code_path, ) def to_model( @@ -261,6 +263,7 @@ def to_model( ) if self.pipeline_spec else None, + code_path=self.code_path, template_id=self.template_id, ) return PipelineDeploymentResponse( From 1710e22d2a7a2b6cf327cbc405377d1553a4d470 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Tue, 30 Jul 2024 16:46:42 +0200 Subject: [PATCH 02/23] Docstrings --- .../entrypoints/base_entrypoint_configuration.py | 16 +++++++++++++++- src/zenml/enums.py | 4 +++- src/zenml/models/v2/misc/build_item.py | 9 ++++++++- src/zenml/new/pipelines/build_utils.py | 8 +++++++- 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/zenml/entrypoints/base_entrypoint_configuration.py b/src/zenml/entrypoints/base_entrypoint_configuration.py index 2fd25d306d4..284d83b2731 100644 --- a/src/zenml/entrypoints/base_entrypoint_configuration.py +++ b/src/zenml/entrypoints/base_entrypoint_configuration.py @@ -200,7 +200,7 @@ def download_code_if_necessary( Raises: RuntimeError: If the current environment requires code download - but the deployment does not have an associated code reference. + but the deployment does not have a reference to any code. """ requires_code_download = handle_bool_env_var( ENV_ZENML_REQUIRES_CODE_DOWNLOAD @@ -225,6 +225,11 @@ def download_code_if_necessary( def download_code_from_code_repository( self, code_reference: "CodeReferenceResponse" ) -> None: + """Download code from a code repository. + + Args: + code_reference: The reference to the code. + """ logger.info( "Downloading code from code repository `%s` (commit `%s`).", code_reference.code_repository.name, @@ -251,6 +256,15 @@ def download_code_from_code_repository( sys.path.insert(0, download_dir) def download_code_from_artifact_store(self, code_path: str) -> None: + """Download code from the artifact store. + + Args: + code_path: Path where the code is stored. + + Raises: + RuntimeError: If the code is stored in an artifact store which is + not active. + """ logger.info( "Downloading code from artifact store path `%s`.", code_path ) diff --git a/src/zenml/enums.py b/src/zenml/enums.py index 4b1b7997b80..40b2ddaa562 100644 --- a/src/zenml/enums.py +++ b/src/zenml/enums.py @@ -413,9 +413,11 @@ class StackDeploymentProvider(StrEnum): class RequirementType(StrEnum): + """All requirement types.""" + UNKNOWN = "unknown" STACK = "stack" - USER = "user" # maybe pipeline? + USER = "user" INTEGRATION = "integration" LOCAL_ENVIRONMENT = "local_environment" CODE_REPOSITORY = "code_repository" diff --git a/src/zenml/models/v2/misc/build_item.py b/src/zenml/models/v2/misc/build_item.py index 1f288219ee3..1c2ae44452e 100644 --- a/src/zenml/models/v2/misc/build_item.py +++ b/src/zenml/models/v2/misc/build_item.py @@ -29,6 +29,8 @@ class BuildItem(BaseModel): dockerfile: The contents of the Dockerfile used to build the image. requirements: The pip requirements installed in the image. This is a string consisting of multiple concatenated requirements.txt files. + pypi_requirements: PyPI requirements included in the image. + apt_requirements: Apt requirements included in the image. settings_checksum: Checksum of the settings used for the build. contains_code: Whether the image contains user files. requires_code_download: Whether the image needs to download files. @@ -55,7 +57,12 @@ class BuildItem(BaseModel): ) @model_validator(mode="after") - def _build_item_validator(self) -> "BuildItem": + def _migrate_requirements(self) -> "BuildItem": + """Migrate PyPI requirements. + + Returns: + The build item with migrated requirements. + """ if not self.pypi_requirements: if self.requirements: self.pypi_requirements = { diff --git a/src/zenml/new/pipelines/build_utils.py b/src/zenml/new/pipelines/build_utils.py index 071d88aabc7..5dd7ce64c32 100644 --- a/src/zenml/new/pipelines/build_utils.py +++ b/src/zenml/new/pipelines/build_utils.py @@ -671,6 +671,8 @@ def should_upload_code( class UploadContext: + """Upload context.""" + def __init__( self, root: str, @@ -763,6 +765,11 @@ def write_archive(self, output_file: IO[bytes], gzip: bool = True) -> None: @property def git_repo(self) -> Optional[Repo]: + """Git repository active at the upload context root. + + Returns: + The optional git repository active at the upload context root. + """ try: # These imports fail when git is not installed on the machine from git.exc import InvalidGitRepositoryError @@ -814,7 +821,6 @@ def _get_extra_files(self) -> List[Tuple[str, str]]: return list(self._extra_files.items()) -# TODO: which files to include? gitignore, dockerignore, zenignore? def upload_code_if_necessary() -> str: """Upload code to the artifact store if necessary. From 2c8e98e329117c5c790499be26927c3a08a700c7 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Tue, 30 Jul 2024 16:53:54 +0200 Subject: [PATCH 03/23] Add DB migration --- .../versions/026d4577b6a0_add_code_path.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 src/zenml/zen_stores/migrations/versions/026d4577b6a0_add_code_path.py diff --git a/src/zenml/zen_stores/migrations/versions/026d4577b6a0_add_code_path.py b/src/zenml/zen_stores/migrations/versions/026d4577b6a0_add_code_path.py new file mode 100644 index 00000000000..1111b89477b --- /dev/null +++ b/src/zenml/zen_stores/migrations/versions/026d4577b6a0_add_code_path.py @@ -0,0 +1,39 @@ +"""Add code path [026d4577b6a0]. + +Revision ID: 026d4577b6a0 +Revises: 0.63.0 +Create Date: 2024-07-30 16:53:32.777594 + +""" + +import sqlalchemy as sa +import sqlmodel +from alembic import op + +# revision identifiers, used by Alembic. +revision = "026d4577b6a0" +down_revision = "0.63.0" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """Upgrade database schema and/or data, creating a new revision.""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("pipeline_deployment", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "code_path", sqlmodel.sql.sqltypes.AutoString(), nullable=True + ) + ) + + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade database schema and/or data back to the previous revision.""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("pipeline_deployment", schema=None) as batch_op: + batch_op.drop_column("code_path") + + # ### end Alembic commands ### From 75ab1570509208eb84e6ce6c532512ffde8686a5 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Wed, 31 Jul 2024 09:58:34 +0200 Subject: [PATCH 04/23] Add archivable superclass --- .../base_entrypoint_configuration.py | 2 +- src/zenml/new/pipelines/build_utils.py | 169 +----------------- src/zenml/new/pipelines/code_archive.py | 121 +++++++++++++ src/zenml/new/pipelines/pipeline.py | 3 - src/zenml/utils/archivable.py | 146 +++++++++++++++ 5 files changed, 273 insertions(+), 168 deletions(-) create mode 100644 src/zenml/new/pipelines/code_archive.py create mode 100644 src/zenml/utils/archivable.py diff --git a/src/zenml/entrypoints/base_entrypoint_configuration.py b/src/zenml/entrypoints/base_entrypoint_configuration.py index 284d83b2731..753e6076fbe 100644 --- a/src/zenml/entrypoints/base_entrypoint_configuration.py +++ b/src/zenml/entrypoints/base_entrypoint_configuration.py @@ -279,7 +279,7 @@ def download_code_from_artifact_store(self, code_path: str) -> None: extract_dir = os.path.abspath("code") os.makedirs(extract_dir) - download_path = "code.tar" + download_path = os.path.basename(code_path) fileio.copy(code_path, download_path) shutil.unpack_archive(filename=download_path, extract_dir=extract_dir) diff --git a/src/zenml/new/pipelines/build_utils.py b/src/zenml/new/pipelines/build_utils.py index 5dd7ce64c32..226751e523a 100644 --- a/src/zenml/new/pipelines/build_utils.py +++ b/src/zenml/new/pipelines/build_utils.py @@ -17,20 +17,15 @@ import os import platform import tempfile -from pathlib import Path from typing import ( - IO, TYPE_CHECKING, Dict, List, Optional, - Tuple, Union, ) from uuid import UUID -from git.repo.base import Repo - import zenml from zenml.client import Client from zenml.code_repositories import BaseCodeRepository @@ -46,10 +41,10 @@ PipelineDeploymentBase, StackResponse, ) +from zenml.new.pipelines.code_archive import CodeArchive from zenml.stack import Stack from zenml.utils import ( source_utils, - string_utils, ) from zenml.utils.pipeline_docker_image_builder import ( PipelineDockerImageBuilder, @@ -155,6 +150,7 @@ def reuse_or_create_pipeline_build( if not build: if ( allow_build_reuse + and not deployment.should_prevent_build_reuse and not requires_included_code( deployment=deployment, code_repository=code_repository ) @@ -670,157 +666,6 @@ def should_upload_code( return True -class UploadContext: - """Upload context.""" - - def __init__( - self, - root: str, - ) -> None: - """Initializes a build context. - - Args: - root: Optional root directory for the build context. - dockerignore_file: Optional path to a dockerignore file. If not - given, a file called `.dockerignore` in the build context root - directory will be used instead if it exists. - """ - self._root = root - self._extra_files: Dict[str, str] = {} - - def add_file(self, source: str, destination: str) -> None: - """Adds a file to the build context. - - Args: - source: The source of the file to add. This can either be a path - or the file content. - destination: The path inside the build context where the file - should be added. - """ - if fileio.exists(source): - with fileio.open(source) as f: - self._extra_files[destination] = f.read() - else: - self._extra_files[destination] = source - - def add_directory(self, source: str, destination: str) -> None: - """Adds a directory to the build context. - - Args: - source: Path to the directory. - destination: The path inside the build context where the directory - should be added. - - Raises: - ValueError: If `source` does not point to a directory. - """ - if not fileio.isdir(source): - raise ValueError( - f"Can't add directory {source} to the build context as it " - "does not exist or is not a directory." - ) - - for dir, _, files in fileio.walk(source): - dir_path = Path(fileio.convert_to_str(dir)) - for file_name in files: - file_name = fileio.convert_to_str(file_name) - file_source = dir_path / file_name - file_destination = ( - Path(destination) - / dir_path.relative_to(source) - / file_name - ) - - with file_source.open("r") as f: - self._extra_files[file_destination.as_posix()] = f.read() - - def write_archive(self, output_file: IO[bytes], gzip: bool = True) -> None: - """Writes an archive of the build context to the given file. - - Args: - output_file: The file to write the archive to. - gzip: Whether to use `gzip` to compress the file. - """ - from docker.utils import build as docker_build_utils - - files = self._get_files() - extra_files = self._get_extra_files() - - context_archive = docker_build_utils.create_archive( - fileobj=output_file, - root=self._root, - files=files, - gzip=gzip, - extra_files=extra_files, - ) - - build_context_size = os.path.getsize(context_archive.name) - if build_context_size > 50 * 1024 * 1024: - logger.warning( - "Code upload size: `%s`. If you believe this is " - "unreasonably large, make sure to include unnecessary files in " - "a `.gitignore` file.", - string_utils.get_human_readable_filesize(build_context_size), - ) - - @property - def git_repo(self) -> Optional[Repo]: - """Git repository active at the upload context root. - - Returns: - The optional git repository active at the upload context root. - """ - try: - # These imports fail when git is not installed on the machine - from git.exc import InvalidGitRepositoryError - from git.repo.base import Repo - except ImportError: - return None - - try: - git_repo = Repo(path=self._root, search_parent_directories=True) - except InvalidGitRepositoryError: - return None - - return git_repo - - def _get_files(self) -> Optional[List[str]]: - if repo := self.git_repo: - try: - result = repo.git.ls_files( - "--cached", - "--others", - "--modified", - "--exclude-standard", - self._root, - ) - except Exception as e: - logger.warning( - "Failed to get non-ignored files from git: %s", str(e) - ) - else: - files = set() - for file in result.split(): - relative_path = os.path.relpath( - os.path.join(repo.working_dir, file), self._root - ) - if os.path.exists(relative_path): - files.add(relative_path) - - return sorted(files) - - return None - - def _get_extra_files(self) -> List[Tuple[str, str]]: - """Gets all extra files of the build context. - - Returns: - A tuple (path, file_content) for all extra files in the build - context. - """ - return list(self._extra_files.items()) - - def upload_code_if_necessary() -> str: """Upload code to the artifact store if necessary. @@ -831,15 +676,11 @@ def upload_code_if_necessary() -> str: Returns: The path where to archived code is uploaded. """ - upload_context = UploadContext(root=source_utils.get_source_root()) + code_archive = CodeArchive(root=source_utils.get_source_root()) artifact_store = Client().active_stack.artifact_store with tempfile.NamedTemporaryFile(mode="w+b", delete=True) as f: - # Don't use gzip as that includes the creation timestamp of the - # compressed tar file, which means the hash changes each time. This - # means currently the archive is not compressed, which should be - # changed. - upload_context.write_archive(f, gzip=False) + code_archive.write_archive(f) hash_ = hashlib.sha1() # nosec @@ -849,7 +690,7 @@ def upload_code_if_necessary() -> str: break hash_.update(data) - filename = f"{hash_.hexdigest()}.tar" + filename = f"{hash_.hexdigest()}.tar.gz" upload_dir = os.path.join(artifact_store.path, "code_uploads") fileio.makedirs(upload_dir) upload_path = os.path.join(upload_dir, filename) diff --git a/src/zenml/new/pipelines/code_archive.py b/src/zenml/new/pipelines/code_archive.py new file mode 100644 index 00000000000..bfbcd0195db --- /dev/null +++ b/src/zenml/new/pipelines/code_archive.py @@ -0,0 +1,121 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Code archive.""" + +import os +from typing import IO, TYPE_CHECKING, Dict, Optional + +from zenml.logger import get_logger +from zenml.utils import string_utils +from zenml.utils.archivable import Archivable + +if TYPE_CHECKING: + from git.repo.base import Repo + + +logger = get_logger(__name__) + + +class CodeArchive(Archivable): + """Code archive.""" + + def __init__(self, root: str) -> None: + """Initialize the object. + + Args: + *args: Unused args for subclasses. + **kwargs: Unused keyword args for subclasses. + """ + super().__init__() + self._root = root + + @property + def git_repo(self) -> Optional["Repo"]: + """Git repository active at the code archive root. + + Returns: + The git repository if available. + """ + try: + # These imports fail when git is not installed on the machine + from git.exc import InvalidGitRepositoryError + from git.repo.base import Repo + except ImportError: + return None + + try: + git_repo = Repo(path=self._root, search_parent_directories=True) + except InvalidGitRepositoryError: + return None + + return git_repo + + def get_files(self) -> Dict[str, str]: + """Gets all regular files that should be included in the archive. + + Returns: + A dict {path_in_archive: path_on_filesystem} for all regular files + in the archive. + """ + if repo := self.git_repo: + try: + result = repo.git.ls_files( + "--cached", + "--others", + "--modified", + "--exclude-standard", + self._root, + ) + except Exception as e: + logger.warning( + "Failed to get non-ignored files from git: %s", str(e) + ) + else: + all_files = {} + for file in result.split(): + file_path = os.path.join(repo.working_dir, file) + path_in_archive = os.path.relpath(file_path, self._root) + if os.path.exists(file_path): + all_files[path_in_archive] = file_path + + return all_files + + all_files = {} + for root, _, files in os.walk(self._root): + relative_root = os.path.relpath(root, self._root) + for file in files: + file_path = os.path.join(root, file) + path_in_archive = os.path.join(relative_root, file) + all_files[path_in_archive] = file_path + + return all_files + + def write_archive( + self, output_file: IO[bytes], use_gzip: bool = True + ) -> None: + """Writes an archive of the build context to the given file. + + Args: + output_file: The file to write the archive to. + use_gzip: Whether to use `gzip` to compress the file. + """ + super().write_archive(output_file=output_file, use_gzip=use_gzip) + archive_size = os.path.getsize(output_file.name) + if archive_size > 20 * 1024 * 1024: + logger.warning( + "Code upload size: `%s`. If you believe this is " + "unreasonably large, make sure to version your code in git and " + "ignore unnecessary files using a `.gitignore` file.", + string_utils.get_human_readable_filesize(archive_size), + ) diff --git a/src/zenml/new/pipelines/pipeline.py b/src/zenml/new/pipelines/pipeline.py index 455f0aa58d4..7621026750d 100644 --- a/src/zenml/new/pipelines/pipeline.py +++ b/src/zenml/new/pipelines/pipeline.py @@ -685,9 +685,6 @@ def _run( "`DockerSettings.prevent_build_reuse` instead." ) - prevent_build_reuse = ( - prevent_build_reuse or deployment.should_prevent_build_reuse - ) build_model = build_utils.reuse_or_create_pipeline_build( deployment=deployment, pipeline_id=pipeline_id, diff --git a/src/zenml/utils/archivable.py b/src/zenml/utils/archivable.py new file mode 100644 index 00000000000..adad057f9f8 --- /dev/null +++ b/src/zenml/utils/archivable.py @@ -0,0 +1,146 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Archivable mixin.""" + +import io +import tarfile +from abc import ABC, abstractmethod +from pathlib import Path +from typing import IO, Any, Dict + +from zenml.io import fileio + + +class Archivable(ABC): + """Archivable mixin class.""" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + """Initialize the object. + + Args: + *args: Unused args for subclasses. + **kwargs: Unused keyword args for subclasses. + """ + self._extra_files: Dict[str, str] = {} + + def add_file(self, source: str, destination: str) -> None: + """Adds a file to the archive. + + Args: + source: The source of the file to add. This can either be a path + or the file content. + destination: The path inside the archive where the file + should be added. + """ + if fileio.exists(source): + with fileio.open(source) as f: + self._extra_files[destination] = f.read() + else: + self._extra_files[destination] = source + + def add_directory(self, source: str, destination: str) -> None: + """Adds a directory to the archive. + + Args: + source: Path to the directory. + destination: The path inside the build context where the directory + should be added. + + Raises: + ValueError: If `source` does not point to a directory. + """ + if not fileio.isdir(source): + raise ValueError( + f"Can't add directory {source} to the build context as it " + "does not exist or is not a directory." + ) + + for dir, _, files in fileio.walk(source): + dir_path = Path(fileio.convert_to_str(dir)) + for file_name in files: + file_name = fileio.convert_to_str(file_name) + file_source = dir_path / file_name + file_destination = ( + Path(destination) + / dir_path.relative_to(source) + / file_name + ) + + with file_source.open("r") as f: + self._extra_files[file_destination.as_posix()] = f.read() + + def write_archive( + self, output_file: IO[bytes], use_gzip: bool = True + ) -> None: + """Writes an archive of the build context to the given file. + + Args: + output_file: The file to write the archive to. + use_gzip: Whether to use `gzip` to compress the file. + """ + files = self.get_files() + extra_files = self.get_extra_files() + + if use_gzip: + from gzip import GzipFile + + # We don't use the builtin gzip functionality of the `tarfile` + # library as that one includes the tar filename and creation + # timestamp in the archive which causes the hash of the resulting + # file to be different each time. We use this hash to avoid + # duplicate uploads, which is why we pass emtpy values for filename + # and mtime here. + fileobj = GzipFile( + filename="", mode="wb", fileobj=output_file, mtime=0.0 + ) + else: + fileobj = output_file + + with tarfile.open(mode="w", fileobj=fileobj) as tf: + for archive_path, file_path in files.items(): + if archive_path in extra_files: + continue + + if info := tf.gettarinfo(file_path, arcname=archive_path): + if info.isfile(): + with open(file_path, "rb") as f: + tf.addfile(info, f) + else: + tf.addfile(info, None) + + for archive_path, contents in extra_files: + info = tarfile.TarInfo(archive_path) + contents_encoded = contents.encode("utf-8") + info.size = len(contents_encoded) + tf.addfile(info, io.BytesIO(contents_encoded)) + + output_file.seek(0) + + @abstractmethod + def get_files(self) -> Dict[str, str]: + """Gets all regular files that should be included in the archive. + + Returns: + A dict {path_in_archive: path_on_filesystem} for all regular files + in the archive. + """ + + def get_extra_files(self) -> Dict[str, str]: + """Gets all extra files that should be included in the archive. + + Returns: + A dict {path_in_archive: file_content} for all extra files in the + archive. + """ + return self._extra_files.copy() From 39ca1f3104302262170f8ed82851312863f152c5 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Wed, 31 Jul 2024 10:58:24 +0200 Subject: [PATCH 05/23] Improve build reuse --- src/zenml/config/build_configuration.py | 13 +++++++++---- src/zenml/config/docker_settings.py | 8 ++++---- src/zenml/new/pipelines/build_utils.py | 12 ++++++------ 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/zenml/config/build_configuration.py b/src/zenml/config/build_configuration.py index b812312c27a..de4d48461e5 100644 --- a/src/zenml/config/build_configuration.py +++ b/src/zenml/config/build_configuration.py @@ -14,11 +14,13 @@ """Build configuration class.""" import hashlib +import json from typing import TYPE_CHECKING, Dict, Optional from pydantic import BaseModel from zenml.config.docker_settings import DockerSettings, SourceFileMode +from zenml.utils import json_utils if TYPE_CHECKING: from zenml.code_repositories import BaseCodeRepository @@ -60,11 +62,14 @@ def compute_settings_checksum( The checksum. """ hash_ = hashlib.md5() # nosec - hash_.update( - self.settings.model_dump_json( - exclude={"prevent_build_reuse"} - ).encode() + settings_json = json.dumps( + self.settings.model_dump( + mode="json", exclude={"prevent_build_reuse"} + ), + sort_keys=True, + default=json_utils.pydantic_encoder, ) + hash_.update(settings_json.encode()) if self.entrypoint: hash_.update(self.entrypoint.encode()) diff --git a/src/zenml/config/docker_settings.py b/src/zenml/config/docker_settings.py index 60dc1ae6e91..66caa454377 100644 --- a/src/zenml/config/docker_settings.py +++ b/src/zenml/config/docker_settings.py @@ -14,7 +14,7 @@ """Docker settings.""" from enum import Enum -from typing import Any, Dict, List, Optional, Set, Union +from typing import Any, Dict, List, Optional, Union from pydantic import BaseModel, Field, model_validator from pydantic_settings import SettingsConfigDict @@ -57,11 +57,11 @@ class SourceFileMode(Enum): DOWNLOAD_FROM_ARTIFACT_STORE = "download_from_artifact_store" -DEFAULT_SOURCE_FILE_MODE = { +DEFAULT_SOURCE_FILE_MODE = [ SourceFileMode.INCLUDE, SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY, SourceFileMode.DOWNLOAD_FROM_ARTIFACT_STORE, -} +] class PythonPackageInstaller(Enum): @@ -233,7 +233,7 @@ class DockerSettings(BaseSettings): user: Optional[str] = None build_config: Optional[DockerBuildConfig] = None - source_files: Set[SourceFileMode] = DEFAULT_SOURCE_FILE_MODE + source_files: List[SourceFileMode] = DEFAULT_SOURCE_FILE_MODE _deprecation_validator = deprecation_utils.deprecate_pydantic_attributes( "copy_files", "copy_global_config" diff --git a/src/zenml/new/pipelines/build_utils.py b/src/zenml/new/pipelines/build_utils.py index 226751e523a..c0210752cfc 100644 --- a/src/zenml/new/pipelines/build_utils.py +++ b/src/zenml/new/pipelines/build_utils.py @@ -85,17 +85,17 @@ def requires_included_code( If the deployment requires code included in the container images. """ for step in deployment.step_configurations.values(): - if step.config.docker_settings.source_files == { + if step.config.docker_settings.source_files == [ SourceFileMode.INCLUDE - }: + ]: return True if ( step.config.docker_settings.source_files - == { + == [ SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY, SourceFileMode.INCLUDE, - } + ] and not code_repository ): return True @@ -116,9 +116,9 @@ def requires_download_from_code_repository( """ return any( step.config.docker_settings.source_files - == { + == [ SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY, - } + ] for step in deployment.step_configurations.values() ) From 65940a41514badd6640a1d0f8b0abc0f6890e86b Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Wed, 31 Jul 2024 11:15:16 +0200 Subject: [PATCH 06/23] Fix gzip for archives --- src/zenml/new/pipelines/build_utils.py | 4 +++- src/zenml/utils/archivable.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/zenml/new/pipelines/build_utils.py b/src/zenml/new/pipelines/build_utils.py index c0210752cfc..7dcd243be44 100644 --- a/src/zenml/new/pipelines/build_utils.py +++ b/src/zenml/new/pipelines/build_utils.py @@ -679,7 +679,9 @@ def upload_code_if_necessary() -> str: code_archive = CodeArchive(root=source_utils.get_source_root()) artifact_store = Client().active_stack.artifact_store - with tempfile.NamedTemporaryFile(mode="w+b", delete=True) as f: + with tempfile.NamedTemporaryFile( + mode="w+b", delete=True, suffix=".tar.gz" + ) as f: code_archive.write_archive(f) hash_ = hashlib.sha1() # nosec diff --git a/src/zenml/utils/archivable.py b/src/zenml/utils/archivable.py index adad057f9f8..dedad681bbb 100644 --- a/src/zenml/utils/archivable.py +++ b/src/zenml/utils/archivable.py @@ -125,6 +125,9 @@ def write_archive( info.size = len(contents_encoded) tf.addfile(info, io.BytesIO(contents_encoded)) + if use_gzip: + fileobj.close() + output_file.seek(0) @abstractmethod From 0214859d438c54740e6cf2a85c5dd90702b41d46 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Wed, 31 Jul 2024 11:56:32 +0200 Subject: [PATCH 07/23] Better error messages --- src/zenml/new/pipelines/build_utils.py | 98 ++++++++++++++++++++------ 1 file changed, 77 insertions(+), 21 deletions(-) diff --git a/src/zenml/new/pipelines/build_utils.py b/src/zenml/new/pipelines/build_utils.py index 7dcd243be44..88750166bf4 100644 --- a/src/zenml/new/pipelines/build_utils.py +++ b/src/zenml/new/pipelines/build_utils.py @@ -123,6 +123,39 @@ def requires_download_from_code_repository( ) +def code_download_possible( + deployment: "PipelineDeploymentBase", + code_repository: Optional["BaseCodeRepository"] = None, +) -> bool: + """Checks whether code download is possible for the deployment. + + Args: + deployment: The deployment. + code_repository: If provided, this code repository can be used to + download the code inside the container images. + + Returns: + Whether code download is possible for the deployment. + """ + for step in deployment.step_configurations.values(): + if ( + SourceFileMode.DOWNLOAD_FROM_ARTIFACT_STORE + in step.config.docker_settings.source_files + ): + continue + + if ( + SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY + in step.config.docker_settings.source_files + and code_repository + ): + continue + + return False + + return True + + def reuse_or_create_pipeline_build( deployment: "PipelineDeploymentBase", allow_build_reuse: bool, @@ -455,26 +488,29 @@ def verify_local_repository_context( if not local_repo_context: raise RuntimeError( "The `DockerSettings` of the pipeline or one of its " - "steps specify that code should be included in the " - "Docker image (`source_files='download'`), but there is no " - "code repository active at your current source root " - f"`{source_utils.get_source_root()}`." + "steps specify that code should be downloaded from a " + "code repository " + "(`source_files=['download_from_code_repository']`), but " + "there is no code repository active at your current source " + f"root `{source_utils.get_source_root()}`." ) elif local_repo_context.is_dirty: raise RuntimeError( "The `DockerSettings` of the pipeline or one of its " - "steps specify that code should be included in the " - "Docker image (`source_files='download'`), but the code " - "repository active at your current source root " + "steps specify that code should be downloaded from a " + "code repository " + "(`source_files=['download_from_code_repository']`), but " + "the code repository active at your current source root " f"`{source_utils.get_source_root()}` has uncommitted " "changes." ) elif local_repo_context.has_local_changes: raise RuntimeError( "The `DockerSettings` of the pipeline or one of its " - "steps specify that code should be included in the " - "Docker image (`source_files='download'`), but the code " - "repository active at your current source root " + "steps specify that code should be downloaded from a " + "code repository " + "(`source_files=['download_from_code_repository']`), but " + "the code repository active at your current source root " f"`{source_utils.get_source_root()}` has unpushed " "changes." ) @@ -482,13 +518,13 @@ def verify_local_repository_context( if local_repo_context: if local_repo_context.is_dirty: logger.warning( - "Unable to use code repository to download code for this run " - "as there are uncommitted changes." + "Unable to use code repository to download code for this " + "run as there are uncommitted changes." ) elif local_repo_context.has_local_changes: logger.warning( - "Unable to use code repository to download code for this run " - "as there are unpushed changes." + "Unable to use code repository to download code for this " + "run as there are unpushed changes." ) code_repository = None @@ -537,13 +573,33 @@ def verify_custom_build( "might differ from the local code in your client environment." ) - if build.requires_code_download and not code_repository: - raise RuntimeError( - "The build you specified does not include code but code download " - "not possible. This might be because you don't have a code " - "repository registered or the code repository contains local " - "changes." - ) + if build.requires_code_download: + if requires_included_code( + deployment=deployment, code_repository=code_repository + ): + raise RuntimeError( + "The `DockerSettings` of the pipeline or one of its " + "steps specify that code should be included in the Docker " + "image (`source_files=['include']`), but the build you " + "specified requires code download. Either update your " + "`DockerSettings` or specify a different build and try " + "again." + ) + + if not code_download_possible( + deployment=deployment, code_repository=code_repository + ): + # The case that download from a code repo is required but not + # possible is already handled in `verify_local_repository_context`. + # This means that some step does not allow code download from the + # artifact store. + raise RuntimeError( + "The `DockerSettings` of the pipeline or one of its " + "steps specify that code can not be downloaded from the " + "artifact store, but the build you specified requires code " + "download. Either update your `DockerSettings` or specify a " + "different build and try again." + ) if build.checksum: build_checksum = compute_build_checksum( From dc0f0b52e47589f00f7f53c0c092e12c09761fee Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Wed, 31 Jul 2024 12:04:57 +0200 Subject: [PATCH 08/23] Docstrings/mypy --- src/zenml/new/pipelines/code_archive.py | 3 +-- src/zenml/utils/archivable.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/zenml/new/pipelines/code_archive.py b/src/zenml/new/pipelines/code_archive.py index bfbcd0195db..022a8c59fd0 100644 --- a/src/zenml/new/pipelines/code_archive.py +++ b/src/zenml/new/pipelines/code_archive.py @@ -34,8 +34,7 @@ def __init__(self, root: str) -> None: """Initialize the object. Args: - *args: Unused args for subclasses. - **kwargs: Unused keyword args for subclasses. + root: Root directory of the archive. """ super().__init__() self._root = root diff --git a/src/zenml/utils/archivable.py b/src/zenml/utils/archivable.py index dedad681bbb..1cb1496a393 100644 --- a/src/zenml/utils/archivable.py +++ b/src/zenml/utils/archivable.py @@ -101,7 +101,7 @@ def write_archive( # file to be different each time. We use this hash to avoid # duplicate uploads, which is why we pass emtpy values for filename # and mtime here. - fileobj = GzipFile( + fileobj: Any = GzipFile( filename="", mode="wb", fileobj=output_file, mtime=0.0 ) else: @@ -119,7 +119,7 @@ def write_archive( else: tf.addfile(info, None) - for archive_path, contents in extra_files: + for archive_path, contents in extra_files.items(): info = tarfile.TarInfo(archive_path) contents_encoded = contents.encode("utf-8") info.size = len(contents_encoded) From 6a70b0720068b48c29f57ad04047b2e717f3f31e Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Wed, 31 Jul 2024 12:14:28 +0200 Subject: [PATCH 09/23] Remove some unnecessary stuff --- src/zenml/enums.py | 11 --- src/zenml/models/v2/misc/build_item.py | 29 +------- src/zenml/new/pipelines/build_utils.py | 96 -------------------------- 3 files changed, 2 insertions(+), 134 deletions(-) diff --git a/src/zenml/enums.py b/src/zenml/enums.py index 40b2ddaa562..2df9e6af793 100644 --- a/src/zenml/enums.py +++ b/src/zenml/enums.py @@ -410,14 +410,3 @@ class StackDeploymentProvider(StrEnum): AWS = "aws" GCP = "gcp" AZURE = "azure" - - -class RequirementType(StrEnum): - """All requirement types.""" - - UNKNOWN = "unknown" - STACK = "stack" - USER = "user" - INTEGRATION = "integration" - LOCAL_ENVIRONMENT = "local_environment" - CODE_REPOSITORY = "code_repository" diff --git a/src/zenml/models/v2/misc/build_item.py b/src/zenml/models/v2/misc/build_item.py index 1c2ae44452e..3d0ae05673f 100644 --- a/src/zenml/models/v2/misc/build_item.py +++ b/src/zenml/models/v2/misc/build_item.py @@ -13,12 +13,9 @@ # permissions and limitations under the License. """Model definition for pipeline build item.""" -import itertools -from typing import Dict, List, Optional +from typing import Optional -from pydantic import BaseModel, Field, model_validator - -from zenml.enums import RequirementType +from pydantic import BaseModel, Field class BuildItem(BaseModel): @@ -43,9 +40,6 @@ class BuildItem(BaseModel): requirements: Optional[str] = Field( default=None, title="The pip requirements installed in the image." ) - pypi_requirements: Dict[RequirementType, List[str]] = {} - apt_requirements: Dict[RequirementType, List[str]] = {} - settings_checksum: Optional[str] = Field( default=None, title="The checksum of the build settings." ) @@ -55,22 +49,3 @@ class BuildItem(BaseModel): requires_code_download: bool = Field( default=False, title="Whether the image needs to download files." ) - - @model_validator(mode="after") - def _migrate_requirements(self) -> "BuildItem": - """Migrate PyPI requirements. - - Returns: - The build item with migrated requirements. - """ - if not self.pypi_requirements: - if self.requirements: - self.pypi_requirements = { - RequirementType.UNKNOWN: self.requirements.splitlines() - } - elif not self.requirements: - self.requirements = "\n".join( - itertools.chain.from_iterable(self.pypi_requirements.values()) - ) - - return self diff --git a/src/zenml/new/pipelines/build_utils.py b/src/zenml/new/pipelines/build_utils.py index 88750166bf4..583c7f6d74a 100644 --- a/src/zenml/new/pipelines/build_utils.py +++ b/src/zenml/new/pipelines/build_utils.py @@ -763,99 +763,3 @@ def upload_code_if_necessary() -> str: ) return upload_path - - -# import os -# import subprocess -# from typing import Tuple - -# from zenml.code_repositories import BaseCodeRepository -# from zenml.config.docker_settings import ( -# DockerSettings, -# PythonEnvironmentExportMethod, -# ) -# from zenml.enums import OperatingSystemType, RequirementType -# from zenml.integrations.registry import integration_registry -# from zenml.stack import Stack -# from zenml.utils import io_utils - - -# def extract_requirements( -# docker_settings: DockerSettings, -# stack: "Stack", -# code_repository: Optional["BaseCodeRepository"] = None, -# ) -> Tuple[Dict[RequirementType, List[str]], Dict[RequirementType, List[str]]]: -# pypi_requirements = {} -# apt_requirements = {} - -# if docker_settings.install_stack_requirements: -# if stack_pypi_requirements := stack.requirements(): -# pypi_requirements[RequirementType.STACK] = sorted( -# stack_pypi_requirements -# ) - -# if stack_apt_requirements := stack.apt_packages: -# apt_requirements[RequirementType.STACK] = stack_apt_requirements - -# if code_repository: -# pypi_requirements[RequirementType.CODE_REPOSITORY] = sorted( -# code_repository.requirements -# ) - -# if docker_settings.replicate_local_python_environment: -# if isinstance( -# docker_settings.replicate_local_python_environment, -# PythonEnvironmentExportMethod, -# ): -# command = ( -# docker_settings.replicate_local_python_environment.command -# ) -# else: -# command = " ".join( -# docker_settings.replicate_local_python_environment -# ) - -# try: -# local_requirements = subprocess.check_output( -# command, -# shell=True, # nosec -# ).decode() -# except subprocess.CalledProcessError as e: -# raise RuntimeError( -# "Unable to export local python packages." -# ) from e - -# pypi_requirements[RequirementType.LOCAL_ENVIRONMENT] = ( -# local_requirements.splitlines() -# ) - -# if docker_settings.required_integrations: -# for integration_name in docker_settings.required_integrations: -# integration = integration_registry.integrations[integration_name] -# pypi_requirements[RequirementType.INTEGRATION] = ( -# integration.get_requirements( -# target_os=OperatingSystemType.LINUX -# ) -# ) -# apt_requirements[RequirementType.INTEGRATION] = ( -# integration.APT_PACKAGES -# ) - -# if isinstance(docker_settings.requirements, str): -# path = os.path.abspath(docker_settings.requirements) -# try: -# user_requirements = io_utils.read_file_contents_as_string(path) -# except FileNotFoundError as e: -# raise FileNotFoundError( -# f"Requirements file {path} does not exist." -# ) from e - -# pypi_requirements[RequirementType.USER] = ( -# user_requirements.splitlines() -# ) -# elif isinstance(docker_settings.requirements, List): -# pypi_requirements[RequirementType.USER] = docker_settings.requirements - -# apt_requirements[RequirementType.USER] = docker_settings.apt_packages - -# return pypi_requirements, apt_requirements From 8ae50bf48a229e7eae99d5237dcb3771f016c9b8 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Wed, 31 Jul 2024 12:17:17 +0200 Subject: [PATCH 10/23] Typo --- src/zenml/models/v2/misc/build_item.py | 2 -- src/zenml/utils/archivable.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/zenml/models/v2/misc/build_item.py b/src/zenml/models/v2/misc/build_item.py index 3d0ae05673f..13d35ddefd9 100644 --- a/src/zenml/models/v2/misc/build_item.py +++ b/src/zenml/models/v2/misc/build_item.py @@ -26,8 +26,6 @@ class BuildItem(BaseModel): dockerfile: The contents of the Dockerfile used to build the image. requirements: The pip requirements installed in the image. This is a string consisting of multiple concatenated requirements.txt files. - pypi_requirements: PyPI requirements included in the image. - apt_requirements: Apt requirements included in the image. settings_checksum: Checksum of the settings used for the build. contains_code: Whether the image contains user files. requires_code_download: Whether the image needs to download files. diff --git a/src/zenml/utils/archivable.py b/src/zenml/utils/archivable.py index 1cb1496a393..c2d7b83c422 100644 --- a/src/zenml/utils/archivable.py +++ b/src/zenml/utils/archivable.py @@ -99,7 +99,7 @@ def write_archive( # library as that one includes the tar filename and creation # timestamp in the archive which causes the hash of the resulting # file to be different each time. We use this hash to avoid - # duplicate uploads, which is why we pass emtpy values for filename + # duplicate uploads, which is why we pass empty values for filename # and mtime here. fileobj: Any = GzipFile( filename="", mode="wb", fileobj=output_file, mtime=0.0 From 961a043bfb99db3d72a4076443eeb8bfc29527b5 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Wed, 31 Jul 2024 14:16:08 +0200 Subject: [PATCH 11/23] Update build context to inherit from new superclass --- src/zenml/image_builders/build_context.py | 95 ++++++----------------- 1 file changed, 24 insertions(+), 71 deletions(-) diff --git a/src/zenml/image_builders/build_context.py b/src/zenml/image_builders/build_context.py index 6502784d717..6c0146d8a60 100644 --- a/src/zenml/image_builders/build_context.py +++ b/src/zenml/image_builders/build_context.py @@ -14,18 +14,18 @@ """Image build context.""" import os -from pathlib import Path -from typing import IO, Dict, List, Optional, Set, Tuple, cast +from typing import IO, Dict, List, Optional, Set, cast from zenml.constants import REPOSITORY_DIRECTORY_NAME from zenml.io import fileio from zenml.logger import get_logger from zenml.utils import io_utils, string_utils +from zenml.utils.archivable import Archivable logger = get_logger(__name__) -class BuildContext: +class BuildContext(Archivable): """Image build context. This class is responsible for creating an archive of the files needed to @@ -68,70 +68,26 @@ def dockerignore_file(self) -> Optional[str]: return None - def add_file(self, source: str, destination: str) -> None: - """Adds a file to the build context. - - Args: - source: The source of the file to add. This can either be a path - or the file content. - destination: The path inside the build context where the file - should be added. - """ - if fileio.exists(source): - with fileio.open(source) as f: - self._extra_files[destination] = f.read() - else: - self._extra_files[destination] = source - - def add_directory(self, source: str, destination: str) -> None: - """Adds a directory to the build context. - - Args: - source: Path to the directory. - destination: The path inside the build context where the directory - should be added. - - Raises: - ValueError: If `source` does not point to a directory. - """ - if not fileio.isdir(source): - raise ValueError( - f"Can't add directory {source} to the build context as it " - "does not exist or is not a directory." - ) - - for dir, _, files in fileio.walk(source): - dir_path = Path(fileio.convert_to_str(dir)) - for file_name in files: - file_name = fileio.convert_to_str(file_name) - file_source = dir_path / file_name - file_destination = ( - Path(destination) - / dir_path.relative_to(source) - / file_name - ) - - with file_source.open("r") as f: - self._extra_files[file_destination.as_posix()] = f.read() - - def write_archive(self, output_file: IO[bytes], gzip: bool = True) -> None: + def write_archive( + self, output_file: IO[bytes], use_gzip: bool = True + ) -> None: """Writes an archive of the build context to the given file. Args: output_file: The file to write the archive to. - gzip: Whether to use `gzip` to compress the file. + use_gzip: Whether to use `gzip` to compress the file. """ from docker.utils import build as docker_build_utils - files = self._get_files() - extra_files = self._get_extra_files() + files = self.get_files() + extra_files = self.get_extra_files() context_archive = docker_build_utils.create_archive( fileobj=output_file, root=self._root, - files=sorted(files), - gzip=gzip, - extra_files=extra_files, + files=sorted(files.keys()), + gzip=use_gzip, + extra_files=list(extra_files.items()), ) build_context_size = os.path.getsize(context_archive.name) @@ -151,33 +107,30 @@ def write_archive(self, output_file: IO[bytes], gzip: bool = True) -> None: os.path.join(self._root, ".dockerignore"), ) - def _get_files(self) -> Set[str]: - """Gets all non-ignored files in the build context root directory. + def get_files(self) -> Dict[str, str]: + """Gets all regular files that should be included in the archive. Returns: - All build context files. + A dict {path_in_archive: path_on_filesystem} for all regular files + in the archive. """ if self._root: - exclude_patterns = self._get_exclude_patterns() from docker.utils import build as docker_build_utils - return cast( + exclude_patterns = self._get_exclude_patterns() + + archive_paths = cast( Set[str], docker_build_utils.exclude_paths( self._root, patterns=exclude_patterns ), ) + return { + archive_path: os.path.join(self._root, archive_path) + for archive_path in archive_paths + } else: - return set() - - def _get_extra_files(self) -> List[Tuple[str, str]]: - """Gets all extra files of the build context. - - Returns: - A tuple (path, file_content) for all extra files in the build - context. - """ - return list(self._extra_files.items()) + return {} def _get_exclude_patterns(self) -> List[str]: """Gets all exclude patterns from the dockerignore file. From b3f9409b99d2efd3fda369cf2f9a06af69e928e3 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Wed, 31 Jul 2024 15:07:54 +0200 Subject: [PATCH 12/23] Fix unit tests --- .../image_builders/base_image_builder.py | 2 +- .../image_builders/kaniko_image_builder.py | 2 +- src/zenml/new/pipelines/build_utils.py | 17 +++++++++--- .../unit/image_builders/test_build_context.py | 27 ++++++++++--------- tests/unit/pipelines/test_build_utils.py | 23 +++++++--------- 5 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/zenml/image_builders/base_image_builder.py b/src/zenml/image_builders/base_image_builder.py index 955275cc9a9..b99bb277ec8 100644 --- a/src/zenml/image_builders/base_image_builder.py +++ b/src/zenml/image_builders/base_image_builder.py @@ -119,7 +119,7 @@ def _upload_build_context( hash_ = hashlib.sha1() # nosec with tempfile.NamedTemporaryFile(mode="w+b", delete=False) as f: - build_context.write_archive(f, gzip=True) + build_context.write_archive(f, use_gzip=True) while True: data = f.read(64 * 1024) diff --git a/src/zenml/integrations/kaniko/image_builders/kaniko_image_builder.py b/src/zenml/integrations/kaniko/image_builders/kaniko_image_builder.py index ebb3f09fefa..1a4aaac3ad8 100644 --- a/src/zenml/integrations/kaniko/image_builders/kaniko_image_builder.py +++ b/src/zenml/integrations/kaniko/image_builders/kaniko_image_builder.py @@ -295,7 +295,7 @@ def _write_build_context( logger.debug("Writing build context to process stdin.") assert process.stdin with process.stdin as _, tempfile.TemporaryFile(mode="w+b") as f: - build_context.write_archive(f, gzip=True) + build_context.write_archive(f, use_gzip=True) while True: data = f.read(1024) if not data: diff --git a/src/zenml/new/pipelines/build_utils.py b/src/zenml/new/pipelines/build_utils.py index 583c7f6d74a..eac0d4974fe 100644 --- a/src/zenml/new/pipelines/build_utils.py +++ b/src/zenml/new/pipelines/build_utils.py @@ -586,13 +586,22 @@ def verify_custom_build( "again." ) + if ( + requires_download_from_code_repository(deployment=deployment) + and not code_repository + ): + raise RuntimeError( + "The `DockerSettings` of the pipeline or one of its " + "steps specify that code should be downloaded from a " + "code repository " + "(`source_files=['download_from_code_repository']`), but " + "there is no code repository active at your current source " + f"root `{source_utils.get_source_root()}`." + ) + if not code_download_possible( deployment=deployment, code_repository=code_repository ): - # The case that download from a code repo is required but not - # possible is already handled in `verify_local_repository_context`. - # This means that some step does not allow code download from the - # artifact store. raise RuntimeError( "The `DockerSettings` of the pipeline or one of its " "steps specify that code can not be downloaded from the " diff --git a/tests/unit/image_builders/test_build_context.py b/tests/unit/image_builders/test_build_context.py index 3f90770191a..cb70d8ce793 100644 --- a/tests/unit/image_builders/test_build_context.py +++ b/tests/unit/image_builders/test_build_context.py @@ -27,9 +27,9 @@ def test_adding_extra_files(tmp_path): build_context.add_file(str(extra_file_path), destination="indirect") - extra_files = build_context._get_extra_files() - assert extra_files[0] == ("direct", "file content as string") - assert extra_files[1] == ("indirect", "file content in file") + extra_files = build_context.get_extra_files() + assert extra_files["direct"] == "file content as string" + assert extra_files["indirect"] == "file content in file" def test_adding_extra_directory(tmp_path): @@ -40,9 +40,9 @@ def test_adding_extra_directory(tmp_path): build_context = BuildContext() build_context.add_directory(str(tmp_path), destination="dir") - extra_files = build_context._get_extra_files() - assert ("dir/1", "file 1") in extra_files - assert ("dir/2", "file 2") in extra_files + extra_files = build_context.get_extra_files() + assert extra_files["dir/1"] == "file 1" + assert extra_files["dir/2"] == "file 2" def test_build_context_includes_and_excludes(tmp_path): @@ -55,7 +55,10 @@ def test_build_context_includes_and_excludes(tmp_path): build_context = BuildContext(root=str(root)) assert build_context.dockerignore_file is None assert build_context._get_exclude_patterns() == [] - assert build_context._get_files() == {"1", "2"} + assert build_context.get_files() == { + "1": str(root / "1"), + "2": str(root / "2"), + } custom_dockerignore = tmp_path / "custom_dockerignore" custom_dockerignore.write_text("/1") @@ -64,7 +67,7 @@ def test_build_context_includes_and_excludes(tmp_path): ) build_context.dockerignore_file == str(custom_dockerignore) assert build_context._get_exclude_patterns() == ["/1", "!/.zen"] - assert build_context._get_files() == {"2"} + assert build_context.get_files() == {"2": str(root / "2")} zen_repo = root / ".zen" / "config.yaml" zen_repo.parent.mkdir() @@ -74,8 +77,8 @@ def test_build_context_includes_and_excludes(tmp_path): build_context = BuildContext(root=str(root)) build_context.dockerignore_file == str(default_dockerignore) assert build_context._get_exclude_patterns() == ["*", "!/.zen"] - assert build_context._get_files() == { - ".dockerignore", - ".zen", - os.path.join(".zen", "config.yaml"), + assert build_context.get_files() == { + ".dockerignore": str(default_dockerignore), + ".zen": str(root / ".zen"), + os.path.join(".zen", "config.yaml"): str(zen_repo), } diff --git a/tests/unit/pipelines/test_build_utils.py b/tests/unit/pipelines/test_build_utils.py index d86f4cbbb2e..f8e7b785c68 100644 --- a/tests/unit/pipelines/test_build_utils.py +++ b/tests/unit/pipelines/test_build_utils.py @@ -158,7 +158,9 @@ def test_build_uses_correct_settings(mocker, empty_pipeline): # noqa: F811 """Tests that the build settings and pipeline ID get correctly forwarded.""" build_config = BuildConfiguration( key="key", - settings=DockerSettings(), + settings=DockerSettings( + source_files=["include", "download_from_code_repository"] + ), step_name="step_name", entrypoint="entrypoint", extra_files={"key": "value"}, @@ -366,11 +368,8 @@ def test_custom_build_verification( } ) ) - - mocker.patch.object( - PipelineDeploymentBase, - "requires_code_download", - new_callable=mocker.PropertyMock, + mocker.patch( + "zenml.new.pipelines.build_utils.requires_download_from_code_repository", return_value=True, ) @@ -436,10 +435,8 @@ def test_local_repo_verification( client_version=sample_deployment_response_model.client_version, server_version=sample_deployment_response_model.server_version, ) - mocker.patch.object( - PipelineDeploymentBase, - "requires_code_download", - new_callable=mocker.PropertyMock, + mocker.patch( + "zenml.new.pipelines.build_utils.requires_download_from_code_repository", return_value=False, ) @@ -456,10 +453,8 @@ def test_local_repo_verification( local_repo_context=context_with_local_changes, ) - mocker.patch.object( - PipelineDeploymentBase, - "requires_code_download", - new_callable=mocker.PropertyMock, + mocker.patch( + "zenml.new.pipelines.build_utils.requires_download_from_code_repository", return_value=True, ) mocker.patch.object(Stack, "get_docker_builds", return_value=[]) From 64a49cbdedad0a0c30298e553f138bcbe0bbf3bf Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Wed, 31 Jul 2024 16:10:45 +0200 Subject: [PATCH 13/23] Small fixes --- .../base_entrypoint_configuration.py | 4 +++- src/zenml/new/pipelines/build_utils.py | 17 +++++++++++------ src/zenml/new/pipelines/code_archive.py | 2 +- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/zenml/entrypoints/base_entrypoint_configuration.py b/src/zenml/entrypoints/base_entrypoint_configuration.py index 753e6076fbe..cb24d646fdb 100644 --- a/src/zenml/entrypoints/base_entrypoint_configuration.py +++ b/src/zenml/entrypoints/base_entrypoint_configuration.py @@ -252,8 +252,9 @@ def download_code_from_code_repository( code_repository_utils.set_custom_local_repository( root=code_repo_root, commit=code_reference.commit, repo=repo ) - # Add downloaded file directory to python path + sys.path.insert(0, download_dir) + os.chdir(download_dir) def download_code_from_artifact_store(self, code_path: str) -> None: """Download code from the artifact store. @@ -287,6 +288,7 @@ def download_code_from_artifact_store(self, code_path: str) -> None: source_utils.set_custom_source_root(extract_dir) sys.path.insert(0, extract_dir) + os.chdir(extract_dir) @abstractmethod def run(self) -> None: diff --git a/src/zenml/new/pipelines/build_utils.py b/src/zenml/new/pipelines/build_utils.py index eac0d4974fe..100db2dc25a 100644 --- a/src/zenml/new/pipelines/build_utils.py +++ b/src/zenml/new/pipelines/build_utils.py @@ -43,9 +43,7 @@ ) from zenml.new.pipelines.code_archive import CodeArchive from zenml.stack import Stack -from zenml.utils import ( - source_utils, -) +from zenml.utils import source_utils, string_utils from zenml.utils.pipeline_docker_image_builder import ( PipelineDockerImageBuilder, ) @@ -744,6 +742,8 @@ def upload_code_if_necessary() -> str: code_archive = CodeArchive(root=source_utils.get_source_root()) artifact_store = Client().active_stack.artifact_store + logger.info("Archiving code...") + with tempfile.NamedTemporaryFile( mode="w+b", delete=True, suffix=".tar.gz" ) as f: @@ -763,12 +763,17 @@ def upload_code_if_necessary() -> str: upload_path = os.path.join(upload_dir, filename) if not fileio.exists(upload_path): - logger.info("Uploading code to `%s`.", upload_path) + archive_size = string_utils.get_human_readable_filesize( + os.path.getsize(f.name) + ) + logger.info( + "Uploading code to `%s` (Size: %s).", upload_path, archive_size + ) fileio.copy(f.name, upload_path) logger.info("Code upload finished.") else: - logger.debug( - "Code already exists in artifact store, not uploading." + logger.info( + "Code already exists in artifact store, skipping upload." ) return upload_path diff --git a/src/zenml/new/pipelines/code_archive.py b/src/zenml/new/pipelines/code_archive.py index 022a8c59fd0..595b83b9af4 100644 --- a/src/zenml/new/pipelines/code_archive.py +++ b/src/zenml/new/pipelines/code_archive.py @@ -113,7 +113,7 @@ def write_archive( archive_size = os.path.getsize(output_file.name) if archive_size > 20 * 1024 * 1024: logger.warning( - "Code upload size: `%s`. If you believe this is " + "Code archive size: `%s`. If you believe this is " "unreasonably large, make sure to version your code in git and " "ignore unnecessary files using a `.gitignore` file.", string_utils.get_human_readable_filesize(archive_size), From a4f8d6c01ca313243ce141d18088b906f577812b Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Wed, 31 Jul 2024 17:42:27 +0200 Subject: [PATCH 14/23] Ignore .zen folder and other small improvements --- src/zenml/new/pipelines/code_archive.py | 52 ++++++++++++++++++++----- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/src/zenml/new/pipelines/code_archive.py b/src/zenml/new/pipelines/code_archive.py index 595b83b9af4..fa68f49b939 100644 --- a/src/zenml/new/pipelines/code_archive.py +++ b/src/zenml/new/pipelines/code_archive.py @@ -14,6 +14,7 @@ """Code archive.""" import os +from pathlib import Path from typing import IO, TYPE_CHECKING, Dict, Optional from zenml.logger import get_logger @@ -60,13 +61,33 @@ def git_repo(self) -> Optional["Repo"]: return git_repo + def _get_all_files(self) -> Dict[str, str]: + """Get all files inside the archive root. + + Returns: + All files inside the archive root. + """ + all_files = {} + for root, _, files in os.walk(self._root): + for file in files: + file_path = os.path.join(root, file) + path_in_archive = os.path.relpath(file_path, self._root) + all_files[path_in_archive] = file_path + + return all_files + def get_files(self) -> Dict[str, str]: """Gets all regular files that should be included in the archive. + Raises: + RuntimeError: If the code archive would not include any files. + Returns: A dict {path_in_archive: path_on_filesystem} for all regular files in the archive. """ + all_files = {} + if repo := self.git_repo: try: result = repo.git.ls_files( @@ -80,23 +101,34 @@ def get_files(self) -> Dict[str, str]: logger.warning( "Failed to get non-ignored files from git: %s", str(e) ) + all_files = self._get_all_files() else: - all_files = {} for file in result.split(): file_path = os.path.join(repo.working_dir, file) path_in_archive = os.path.relpath(file_path, self._root) + if os.path.exists(file_path): all_files[path_in_archive] = file_path + else: + all_files = self._get_all_files() + + if not all_files: + raise RuntimeError( + "The code archive to be uploaded does not contain any files. " + "This is probably because all files in your source root " + f"`{self._root}` are ignored by a .gitignore file." + ) - return all_files - - all_files = {} - for root, _, files in os.walk(self._root): - relative_root = os.path.relpath(root, self._root) - for file in files: - file_path = os.path.join(root, file) - path_in_archive = os.path.join(relative_root, file) - all_files[path_in_archive] = file_path + # Explicitly remove .zen directories as we write an updated version + # to disk everytime ZenML is called. This updates the mtime of the + # file, which invalidates the code upload caching. The values in + # the .zen directory are not needed anyway as we set them as + # environment variables. + all_files = { + path_in_archive: file_path + for path_in_archive, file_path in sorted(all_files.items()) + if ".zen" not in Path(path_in_archive).parts[:-1] + } return all_files From 190159d358940cc887ca6dcfc718624b7fb4fe94 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Thu, 1 Aug 2024 15:48:14 +0200 Subject: [PATCH 15/23] Sort and remove duplicates for better build reuse --- src/zenml/config/docker_settings.py | 60 +++++++++++++++++------------ 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/src/zenml/config/docker_settings.py b/src/zenml/config/docker_settings.py index 66caa454377..07338e82f92 100644 --- a/src/zenml/config/docker_settings.py +++ b/src/zenml/config/docker_settings.py @@ -16,13 +16,13 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union -from pydantic import BaseModel, Field, model_validator +from pydantic import BaseModel, Field, field_validator, model_validator from pydantic_settings import SettingsConfigDict from zenml.config.base_settings import BaseSettings +from zenml.enums import StrEnum from zenml.logger import get_logger from zenml.utils import deprecation_utils -from zenml.utils.pydantic_utils import before_validator_handler logger = get_logger(__name__) @@ -49,7 +49,7 @@ def command(self) -> str: }[self] -class SourceFileMode(Enum): +class SourceFileMode(StrEnum): """Different methods to handle source files in Docker images.""" INCLUDE = "include" @@ -239,43 +239,53 @@ class DockerSettings(BaseSettings): "copy_files", "copy_global_config" ) - @model_validator(mode="before") + @field_validator("source_files", mode="before") @classmethod - @before_validator_handler - def _migrate_source_files(cls, data: Dict[str, Any]) -> Dict[str, Any]: - """Migrates the value from the old source_files attributes. + def _migrate_source_files(cls, value: Any) -> Any: + """Migrate old source_files values. Args: - data: The settings values. + value: The attribute value. Raises: ValueError: If an invalid source file mode is specified. Returns: - The migrated settings values. + The migrated value. """ - source_files = data.get("source_files", None) - - if isinstance(source_files, str): - if source_files == "download": - new_source_files = { + if isinstance(value, str): + if value == "download": + new_source_files = [ SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY - } - elif source_files == "download_or_include": - new_source_files = { + ] + elif value == "download_or_include": + new_source_files = [ SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY, SourceFileMode.INCLUDE, - } - elif source_files == "ignore": - new_source_files = set() - elif source_files == "include": - new_source_files = {SourceFileMode.INCLUDE} + ] + elif value == "ignore": + new_source_files = [] + elif value == "include": + new_source_files = [SourceFileMode.INCLUDE] else: - raise ValueError(f"Invalid source file mode `{source_files}`.") + raise ValueError(f"Invalid source file mode `{value}`.") + + return new_source_files + + return value + + @field_validator("source_files", mode="after") + @classmethod + def _sort_source_files(cls, value: List[str]) -> List[str]: + """Sort the source files list. - data["source_files"] = new_source_files + Args: + value: The attribute value. - return data + Returns: + The sorted value with duplicates removed. + """ + return sorted(set(value)) @model_validator(mode="after") def _validate_skip_build(self) -> "DockerSettings": From 69bc05900474552a9fefb3680e34fc9656ab3485 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Thu, 1 Aug 2024 17:02:16 +0200 Subject: [PATCH 16/23] Update docker settings to use booleans --- src/zenml/config/build_configuration.py | 15 +-- src/zenml/config/docker_settings.py | 159 +++++++++++------------ src/zenml/new/pipelines/build_utils.py | 65 +++++---- tests/unit/pipelines/test_build_utils.py | 5 +- 4 files changed, 115 insertions(+), 129 deletions(-) diff --git a/src/zenml/config/build_configuration.py b/src/zenml/config/build_configuration.py index de4d48461e5..120e7b5e325 100644 --- a/src/zenml/config/build_configuration.py +++ b/src/zenml/config/build_configuration.py @@ -19,7 +19,7 @@ from pydantic import BaseModel -from zenml.config.docker_settings import DockerSettings, SourceFileMode +from zenml.config.docker_settings import DockerSettings from zenml.utils import json_utils if TYPE_CHECKING: @@ -113,10 +113,7 @@ def should_include_files( if self.should_download_files(code_repository=code_repository): return False - if SourceFileMode.INCLUDE in self.settings.source_files: - return True - - return False + return self.settings.allow_including_files_in_images def should_download_files( self, @@ -136,10 +133,7 @@ def should_download_files( ): return True - if ( - SourceFileMode.DOWNLOAD_FROM_ARTIFACT_STORE - in self.settings.source_files - ): + if self.settings.allow_download_from_artifact_store: return True return False @@ -159,8 +153,7 @@ def should_download_files_from_code_repository( """ if ( code_repository - and SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY - in self.settings.source_files + and self.settings.allow_download_from_code_repository ): return True diff --git a/src/zenml/config/docker_settings.py b/src/zenml/config/docker_settings.py index 07338e82f92..62c5b60d54d 100644 --- a/src/zenml/config/docker_settings.py +++ b/src/zenml/config/docker_settings.py @@ -16,13 +16,12 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union -from pydantic import BaseModel, Field, field_validator, model_validator -from pydantic_settings import SettingsConfigDict +from pydantic import BaseModel, ConfigDict, Field, model_validator from zenml.config.base_settings import BaseSettings -from zenml.enums import StrEnum from zenml.logger import get_logger from zenml.utils import deprecation_utils +from zenml.utils.pydantic_utils import before_validator_handler logger = get_logger(__name__) @@ -49,21 +48,6 @@ def command(self) -> str: }[self] -class SourceFileMode(StrEnum): - """Different methods to handle source files in Docker images.""" - - INCLUDE = "include" - DOWNLOAD_FROM_CODE_REPOSITORY = "download_from_code_repository" - DOWNLOAD_FROM_ARTIFACT_STORE = "download_from_artifact_store" - - -DEFAULT_SOURCE_FILE_MODE = [ - SourceFileMode.INCLUDE, - SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY, - SourceFileMode.DOWNLOAD_FROM_ARTIFACT_STORE, -] - - class PythonPackageInstaller(Enum): """Different installers for python packages.""" @@ -141,8 +125,6 @@ class DockerSettings(BaseSettings): when the `dockerfile` attribute is set. If this is left empty, the build context will only contain the Dockerfile. parent_image_build_config: Configuration for the parent image build. - build_options: DEPRECATED, use parent_image_build_config.build_options - instead. skip_build: If set to `True`, the parent image will be used directly to run the steps of your pipeline. prevent_build_reuse: Prevent the reuse of an existing build. @@ -183,31 +165,29 @@ class DockerSettings(BaseSettings): environment: Dictionary of environment variables to set inside the Docker image. build_config: Configuration for the main image build. - dockerignore: DEPRECATED, use build_config.dockerignore instead. - copy_files: DEPRECATED/UNUSED. - copy_global_config: DEPRECATED/UNUSED. user: If not `None`, will set the user, make it owner of the `/app` directory which contains all the user code and run the container entrypoint as this user. - source_files: Defines how the user source files will be handled when - building the Docker image. - * INCLUDE: The files will be included in the Docker image. - * DOWNLOAD: The files will be downloaded when running the image. If - this is specified, the files must be inside a registered code - repository and the repository must have no local changes, - otherwise the build will fail. - * DOWNLOAD_OR_INCLUDE: The files will be downloaded if they're - inside a registered code repository and the repository has no - local changes, otherwise they will be included in the image. - * IGNORE: The files will not be included or downloaded in the image. - If you use this option, you're responsible that all the files - to run your steps exist in the right place. + allow_including_files_in_images: If `True`, code can be included in the + Docker images if code download from a code repository or artifact + store is disabled or not possible. + allow_download_from_code_repository: If `True`, code can be downloaded + from a code repository if possible. + allow_download_from_artifact_store: If `True`, code can be downloaded + from the artifact store. + build_options: DEPRECATED, use parent_image_build_config.build_options + instead. + dockerignore: DEPRECATED, use build_config.dockerignore instead. + copy_files: DEPRECATED/UNUSED. + copy_global_config: DEPRECATED/UNUSED. + source_files: DEPRECATED. Use allow_including_files_in_images, + allow_download_from_code_repository and + allow_download_from_artifact_store instead. """ parent_image: Optional[str] = None dockerfile: Optional[str] = None build_context_root: Optional[str] = None - build_options: Dict[str, Any] = {} parent_image_build_config: Optional[DockerBuildConfig] = None skip_build: bool = False prevent_build_reuse: bool = False @@ -227,65 +207,84 @@ class DockerSettings(BaseSettings): install_stack_requirements: bool = True apt_packages: List[str] = [] environment: Dict[str, Any] = {} - dockerignore: Optional[str] = None - copy_files: bool = True - copy_global_config: bool = True user: Optional[str] = None build_config: Optional[DockerBuildConfig] = None - source_files: List[SourceFileMode] = DEFAULT_SOURCE_FILE_MODE + allow_including_files_in_images: bool = True + allow_download_from_code_repository: bool = True + allow_download_from_artifact_store: bool = True + + # Deprecated attributes + build_options: Dict[str, Any] = {} + dockerignore: Optional[str] = None + copy_files: bool = True + copy_global_config: bool = True + source_files: Optional[str] = None _deprecation_validator = deprecation_utils.deprecate_pydantic_attributes( - "copy_files", "copy_global_config" + "copy_files", "copy_global_config", "source_files" ) - @field_validator("source_files", mode="before") + @model_validator(mode="before") @classmethod - def _migrate_source_files(cls, value: Any) -> Any: + @before_validator_handler + def _migrate_source_files(cls, data: Dict[str, Any]) -> Dict[str, Any]: """Migrate old source_files values. Args: - value: The attribute value. + data: The model data. Raises: ValueError: If an invalid source file mode is specified. Returns: - The migrated value. + The migrated data. """ - if isinstance(value, str): - if value == "download": - new_source_files = [ - SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY - ] - elif value == "download_or_include": - new_source_files = [ - SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY, - SourceFileMode.INCLUDE, - ] - elif value == "ignore": - new_source_files = [] - elif value == "include": - new_source_files = [SourceFileMode.INCLUDE] - else: - raise ValueError(f"Invalid source file mode `{value}`.") - - return new_source_files - - return value - - @field_validator("source_files", mode="after") - @classmethod - def _sort_source_files(cls, value: List[str]) -> List[str]: - """Sort the source files list. - - Args: - value: The attribute value. - - Returns: - The sorted value with duplicates removed. - """ - return sorted(set(value)) + source_files = data.get("source_files", None) + + if source_files is None: + return data + + replacement_attributes = [ + "allow_including_files_in_images", + "allow_download_from_code_repository", + "allow_download_from_artifact_store", + ] + if any(v in data for v in replacement_attributes): + logger.warning( + "Both `source_files` and one of %s specified for the " + "DockerSettings, ignoring the `source_files` value.", + replacement_attributes, + ) + return data + + allow_including_files_in_images = False + allow_download_from_code_repository = False + allow_download_from_artifact_store = False + + if source_files == "download": + allow_download_from_code_repository = True + elif source_files == "include": + allow_including_files_in_images = True + elif source_files == "download_or_include": + allow_including_files_in_images = True + allow_download_from_code_repository = True + elif source_files == "ignore": + pass + else: + raise ValueError(f"Invalid source file mode `{source_files}`.") + + data["allow_including_files_in_images"] = ( + allow_including_files_in_images + ) + data["allow_download_from_code_repository"] = ( + allow_download_from_code_repository + ) + data["allow_download_from_artifact_store"] = ( + allow_download_from_artifact_store + ) + + return data @model_validator(mode="after") def _validate_skip_build(self) -> "DockerSettings": @@ -308,7 +307,7 @@ def _validate_skip_build(self) -> "DockerSettings": return self - model_config = SettingsConfigDict( + model_config = ConfigDict( # public attributes are immutable frozen=True, # prevent extra attributes during model initialization diff --git a/src/zenml/new/pipelines/build_utils.py b/src/zenml/new/pipelines/build_utils.py index 100db2dc25a..7d4bc7f2052 100644 --- a/src/zenml/new/pipelines/build_utils.py +++ b/src/zenml/new/pipelines/build_utils.py @@ -29,7 +29,6 @@ import zenml from zenml.client import Client from zenml.code_repositories import BaseCodeRepository -from zenml.config.docker_settings import SourceFileMode from zenml.io import fileio from zenml.logger import get_logger from zenml.models import ( @@ -83,19 +82,16 @@ def requires_included_code( If the deployment requires code included in the container images. """ for step in deployment.step_configurations.values(): - if step.config.docker_settings.source_files == [ - SourceFileMode.INCLUDE - ]: - return True + docker_settings = step.config.docker_settings - if ( - step.config.docker_settings.source_files - == [ - SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY, - SourceFileMode.INCLUDE, - ] - and not code_repository - ): + if docker_settings.allow_download_from_artifact_store: + return False + + if docker_settings.allow_download_from_code_repository: + if code_repository: + continue + + if docker_settings.allow_including_files_in_images: return True return False @@ -112,13 +108,21 @@ def requires_download_from_code_repository( Returns: If the deployment needs to download code from a code repository. """ - return any( - step.config.docker_settings.source_files - == [ - SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY, - ] - for step in deployment.step_configurations.values() - ) + for step in deployment.step_configurations.values(): + docker_settings = step.config.docker_settings + + if docker_settings.allow_download_from_artifact_store: + return False + + if docker_settings.allow_including_files_in_images: + return False + + if docker_settings.allow_download_from_code_repository: + # The other two options are false, which means download from a + # code repo is required. + return True + + return False def code_download_possible( @@ -136,15 +140,11 @@ def code_download_possible( Whether code download is possible for the deployment. """ for step in deployment.step_configurations.values(): - if ( - SourceFileMode.DOWNLOAD_FROM_ARTIFACT_STORE - in step.config.docker_settings.source_files - ): + if step.config.docker_settings.allow_download_from_artifact_store: continue if ( - SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY - in step.config.docker_settings.source_files + step.config.docker_settings.allow_download_from_code_repository and code_repository ): continue @@ -711,22 +711,19 @@ def should_upload_code( return False for step in deployment.step_configurations.values(): - source_files = step.config.docker_settings.source_files + docker_settings = step.config.docker_settings if ( code_reference - and SourceFileMode.DOWNLOAD_FROM_CODE_REPOSITORY in source_files + and docker_settings.allow_download_from_code_repository ): # No upload needed for this step continue - if SourceFileMode.DOWNLOAD_FROM_ARTIFACT_STORE in source_files: - break - else: - # Downloading code in the Docker images is prevented by Docker settings - return False + if docker_settings.allow_download_from_artifact_store: + return True - return True + return False def upload_code_if_necessary() -> str: diff --git a/tests/unit/pipelines/test_build_utils.py b/tests/unit/pipelines/test_build_utils.py index f8e7b785c68..263dc4012b8 100644 --- a/tests/unit/pipelines/test_build_utils.py +++ b/tests/unit/pipelines/test_build_utils.py @@ -158,9 +158,7 @@ def test_build_uses_correct_settings(mocker, empty_pipeline): # noqa: F811 """Tests that the build settings and pipeline ID get correctly forwarded.""" build_config = BuildConfiguration( key="key", - settings=DockerSettings( - source_files=["include", "download_from_code_repository"] - ), + settings=DockerSettings(allow_download_from_artifact_store=False), step_name="step_name", entrypoint="entrypoint", extra_files={"key": "value"}, @@ -426,7 +424,6 @@ def test_local_repo_verification( mocker, sample_deployment_response_model: PipelineDeploymentResponse ): """Test the local repo verification.""" - deployment = PipelineDeploymentBase( run_name_template=sample_deployment_response_model.run_name_template, pipeline_configuration=sample_deployment_response_model.pipeline_configuration, From a203f71a81dd7c815451d11c1f074981cddb403e Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Fri, 2 Aug 2024 09:30:56 +0200 Subject: [PATCH 17/23] Add code path to pipeline run for frontend --- src/zenml/models/v2/core/pipeline_run.py | 13 +++++++++++++ .../zen_stores/schemas/pipeline_run_schemas.py | 3 +++ 2 files changed, 16 insertions(+) diff --git a/src/zenml/models/v2/core/pipeline_run.py b/src/zenml/models/v2/core/pipeline_run.py index c339e6a731f..f856b4a7775 100644 --- a/src/zenml/models/v2/core/pipeline_run.py +++ b/src/zenml/models/v2/core/pipeline_run.py @@ -206,6 +206,10 @@ class PipelineRunResponseMetadata(WorkspaceScopedResponseMetadata): max_length=STR_FIELD_MAX_LENGTH, default=None, ) + code_path: Optional[str] = Field( + default=None, + title="Optional path where the code is stored in the artifact store.", + ) template_id: Optional[UUID] = Field( default=None, description="Template used for the pipeline run.", @@ -425,6 +429,15 @@ def orchestrator_run_id(self) -> Optional[str]: """ return self.get_metadata().orchestrator_run_id + @property + def code_path(self) -> Optional[str]: + """The `code_path` property. + + Returns: + the value of the property. + """ + return self.get_metadata().code_path + @property def template_id(self) -> Optional[UUID]: """The `template_id` property. diff --git a/src/zenml/zen_stores/schemas/pipeline_run_schemas.py b/src/zenml/zen_stores/schemas/pipeline_run_schemas.py index 124cd80abe7..a7708d3ba8d 100644 --- a/src/zenml/zen_stores/schemas/pipeline_run_schemas.py +++ b/src/zenml/zen_stores/schemas/pipeline_run_schemas.py @@ -322,6 +322,9 @@ def to_model( client_environment=client_environment, orchestrator_environment=orchestrator_environment, orchestrator_run_id=self.orchestrator_run_id, + code_path=self.deployment.code_path + if self.deployment + else None, template_id=self.deployment.template_id if self.deployment else None, From 74a3313ca5e78a2e10ae8bcd5080e8ca39be0a93 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Fri, 2 Aug 2024 13:33:08 +0200 Subject: [PATCH 18/23] Move log --- src/zenml/new/pipelines/build_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zenml/new/pipelines/build_utils.py b/src/zenml/new/pipelines/build_utils.py index 7d4bc7f2052..f26e5e23c32 100644 --- a/src/zenml/new/pipelines/build_utils.py +++ b/src/zenml/new/pipelines/build_utils.py @@ -736,11 +736,11 @@ def upload_code_if_necessary() -> str: Returns: The path where to archived code is uploaded. """ + logger.info("Archiving code...") + code_archive = CodeArchive(root=source_utils.get_source_root()) artifact_store = Client().active_stack.artifact_store - logger.info("Archiving code...") - with tempfile.NamedTemporaryFile( mode="w+b", delete=True, suffix=".tar.gz" ) as f: From ec26a276c4274510d2150a1eed6cd0d454be4ab7 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Mon, 5 Aug 2024 14:27:25 +0200 Subject: [PATCH 19/23] Better docstring --- src/zenml/new/pipelines/code_archive.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/zenml/new/pipelines/code_archive.py b/src/zenml/new/pipelines/code_archive.py index fa68f49b939..9eba95cf06b 100644 --- a/src/zenml/new/pipelines/code_archive.py +++ b/src/zenml/new/pipelines/code_archive.py @@ -29,7 +29,12 @@ class CodeArchive(Archivable): - """Code archive.""" + """Code archive class. + + This class is used to archive user code before uploading it to the artifact + store. If the user code is stored in a Git repository, only files not + excluded by gitignores will be included in the archive. + """ def __init__(self, root: str) -> None: """Initialize the object. From 838f45f84e0a0fee520f78b7736b8168cc1327df Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Mon, 5 Aug 2024 15:49:57 +0200 Subject: [PATCH 20/23] Remove hub tests --- tests/unit/_hub/test_client.py | 60 -------------------------- tests/unit/_hub/test_utils.py | 79 ---------------------------------- 2 files changed, 139 deletions(-) delete mode 100644 tests/unit/_hub/test_client.py delete mode 100644 tests/unit/_hub/test_utils.py diff --git a/tests/unit/_hub/test_client.py b/tests/unit/_hub/test_client.py deleted file mode 100644 index 25cbe976ebc..00000000000 --- a/tests/unit/_hub/test_client.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) ZenML GmbH 2023. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing -# permissions and limitations under the License. -"""Unit tests for the ZenML Hub client.""" - -from zenml._hub.client import HubClient -from zenml._hub.constants import ZENML_HUB_DEFAULT_URL -from zenml.constants import ENV_ZENML_HUB_URL - - -def test_default_url(mocker): - """Test that the default URL is set correctly.""" - client = HubClient() - assert client.url == ZENML_HUB_DEFAULT_URL - - # Pass a URL to the constructor. - client = HubClient(url="test_url") - assert client.url == "test_url" - - # Mock setting the environment variable. - mocker.patch.dict("os.environ", {ENV_ZENML_HUB_URL: "test_url"}) - client = HubClient() - assert client.url == "test_url" - - -def test_list_plugins(): - """Test listing plugins.""" - client = HubClient() - plugins = client.list_plugins() - assert len(plugins) > 0 - - -def test_get_plugin(): - """Test getting a plugin.""" - plugin_name = "langchain_qa_example" - client = HubClient() - plugin = client.get_plugin(plugin_name) - assert plugin.name == plugin_name - - # Test getting a specific version. - version = "0.1" - plugin = client.get_plugin(plugin_name, version=version) - assert plugin.name == plugin_name - assert plugin.version == version - - # Test getting a non-existent plugin. - plugin_name = "non_existent_plugin_by_aria_and_blupus" - client = HubClient() - plugin = client.get_plugin(plugin_name) - assert plugin is None diff --git a/tests/unit/_hub/test_utils.py b/tests/unit/_hub/test_utils.py deleted file mode 100644 index 8d563f08540..00000000000 --- a/tests/unit/_hub/test_utils.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) ZenML GmbH 2023. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing -# permissions and limitations under the License. -"""Unit tests for zenml._hub.utils.py""" - -import pytest - -from zenml._hub.utils import parse_plugin_name, plugin_display_name - - -@pytest.mark.parametrize( - "plugin_name, author, name, version", - [ - ("author/plugin_name:version", "author", "plugin_name", "version"), - ("author/plugin_name", "author", "plugin_name", "latest"), - ("plugin_name:version", None, "plugin_name", "version"), - ("plugin_name", None, "plugin_name", "latest"), - ], -) -def test_parse_plugin_name(plugin_name, author, name, version): - """Unit test for `parse_plugin_name`.""" - assert parse_plugin_name(plugin_name) == (author, name, version) - - # Test with different separators. - plugin_name_2 = name - if author: - plugin_name_2 = f"{author},{plugin_name_2}" - if version: - plugin_name_2 = f"{plugin_name_2};{version}" - author_2, name_2, version_2 = parse_plugin_name( - plugin_name_2, - author_separator=",", - version_separator=";", - ) - print(plugin_name, plugin_name_2) - print(author, author_2) - print(name, name_2) - print(version, version_2) - assert author_2 == author - assert name_2 == name - assert version_2 == version - - -@pytest.mark.parametrize( - "invalid_plugin_name", - [ - "", - "invalid/plugin/name", - "invalid:plugin:name", - ], -) -def test_parse_invalid_plugin_name(invalid_plugin_name): - """Unit test for `parse_plugin_name`.""" - with pytest.raises(ValueError): - parse_plugin_name(invalid_plugin_name) - - -@pytest.mark.parametrize( - "plugin_name, author, name, version", - [ - ("author/plugin_name:version", "author", "plugin_name", "version"), - ("author/plugin_name:latest", "author", "plugin_name", None), - ("plugin_name:version", None, "plugin_name", "version"), - ("plugin_name:latest", None, "plugin_name", None), - ], -) -def test_plugin_display_name(plugin_name, author, name, version): - """Unit test for `plugin_display_name`.""" - assert plugin_display_name(name, version, author) == plugin_name From 408c33e3aba2eb584cccb2770f3cf187a4f65cbc Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Tue, 6 Aug 2024 10:57:02 +0200 Subject: [PATCH 21/23] Try manual cleanup --- src/zenml/new/pipelines/build_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/zenml/new/pipelines/build_utils.py b/src/zenml/new/pipelines/build_utils.py index f26e5e23c32..ea9dbc7326f 100644 --- a/src/zenml/new/pipelines/build_utils.py +++ b/src/zenml/new/pipelines/build_utils.py @@ -742,7 +742,7 @@ def upload_code_if_necessary() -> str: artifact_store = Client().active_stack.artifact_store with tempfile.NamedTemporaryFile( - mode="w+b", delete=True, suffix=".tar.gz" + mode="w+b", delete=False, suffix=".tar.gz" ) as f: code_archive.write_archive(f) @@ -773,4 +773,7 @@ def upload_code_if_necessary() -> str: "Code already exists in artifact store, skipping upload." ) + if os.path.exists(f.name): + os.remove(f.name) + return upload_path From 8c8f3baaaebe1df05e01853a15313b6339ecc3a3 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Tue, 6 Aug 2024 11:45:09 +0200 Subject: [PATCH 22/23] Docs --- .../which-files-are-built-into-the-image.md | 21 ++++++++++++------- .../autogenerate-a-template-yaml-file.md | 15 ++++++++++--- docs/book/toc.md | 2 +- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/docs/book/how-to/customize-docker-builds/which-files-are-built-into-the-image.md b/docs/book/how-to/customize-docker-builds/which-files-are-built-into-the-image.md index f2666e169d6..e8ae9078d34 100644 --- a/docs/book/how-to/customize-docker-builds/which-files-are-built-into-the-image.md +++ b/docs/book/how-to/customize-docker-builds/which-files-are-built-into-the-image.md @@ -2,17 +2,23 @@ ZenML determines the root directory of your source files in the following order: -* If you've initialized zenml (`zenml init`), the repository root directory will be used. +* If you've initialized zenml (`zenml init`) in your current working directory or one of its parent directories, the repository root directory will be used. * Otherwise, the parent directory of the Python file you're executing will be the source root. For example, running `python /path/to/file.py`, the source root would be `/path/to`. -You can specify how the files inside this root directory are handled using the `source_files` attribute on the [DockerSettings](https://sdkdocs.zenml.io/latest/core_code_docs/core-config/#zenml.config.docker_settings.DockerSettings): +You can specify how the files inside this root directory are handled using the following three attributes on the [DockerSettings](https://sdkdocs.zenml.io/latest/core_code_docs/core-config/#zenml.config.docker_settings.DockerSettings): +* `allow_download_from_code_repository`: If this is set to `True` and your files are inside a registered [code repository](../setting-up-a-project-repository/connect-your-git-repository.md) and the repository has no local changes, the files will be downloaded from the code repository and not included in the image. +* `allow_download_from_artifact_store`: If the previous option is disabled or no code repository without local changes exists for the root directory, ZenML will archive and upload your code to the artifact store if this is set to `True`. +* `allow_including_files_in_images`: If both previous options were disabled or not possible, ZenML will include your files in the Docker image if this option is enabled. This means a new Docker image has to be built each time you modify one of your code files. -* The default behavior `download_or_include`: The files will be downloaded if they're inside a registered [code repository](../setting-up-a-project-repository/connect-your-git-repository.md) and the repository has no local changes, otherwise, they will be included in the image. -* If you want your files to be included in the image in any case, set the `source_files` attribute to `include`. -* If you want your files to be downloaded in any case, set the `source_files` attribute to `download`. If this is specified, the files must be inside a registered code repository and the repository must have no local changes, otherwise the Docker build will fail. -* If you want to prevent ZenML from copying or downloading any of your source files, you can do so by setting the `source_files` attribute on the Docker settings to `ignore`. This is an advanced feature and will most likely cause unintended and unanticipated behavior when running your pipelines. If you use this, make sure to copy all the necessary files to the correct paths yourself. +{% hint style="warning" %} +Setting all of the above attributes to `False` is not recommended and will most likely cause unintended and unanticipated behavior when running your pipelines. If you do this, you're responsible that all your files are at the correct paths in the Docker images that will be used to run your pipeline steps. +{% endhint %} -**Which files get included** +## Control which files get downloaded + +When downloading files either from a code repository or the artifact store, ZenML downloads all contents of the root directory into the Docker container. To exclude files, track your code in a Git repository use a [gitignore](https://git-scm.com/docs/gitignore/en) to specify which files should be excluded. + +## Control which files get included When including files in the image, ZenML copies all contents of the root directory into the Docker image. To exclude files and keep the image smaller, use a [.dockerignore file](https://docs.docker.com/engine/reference/builder/#dockerignore-file) in either of the following ways: @@ -26,6 +32,7 @@ When including files in the image, ZenML copies all contents of the root directo def my_pipeline(...): ... ``` +
ZenML Scarf
diff --git a/docs/book/how-to/use-configuration-files/autogenerate-a-template-yaml-file.md b/docs/book/how-to/use-configuration-files/autogenerate-a-template-yaml-file.md index b073e03ad95..830b8d3318f 100644 --- a/docs/book/how-to/use-configuration-files/autogenerate-a-template-yaml-file.md +++ b/docs/book/how-to/use-configuration-files/autogenerate-a-template-yaml-file.md @@ -74,7 +74,10 @@ settings: required_integrations: List[str] requirements: Union[NoneType, str, List[str]] skip_build: bool - source_files: SourceFileMode + prevent_build_reuse: bool + allow_including_files_in_images: bool + allow_download_from_code_repository: bool + allow_download_from_artifact_store: bool target_repository: str user: Optional[str] resources: @@ -135,7 +138,10 @@ steps: required_integrations: List[str] requirements: Union[NoneType, str, List[str]] skip_build: bool - source_files: SourceFileMode + prevent_build_reuse: bool + allow_including_files_in_images: bool + allow_download_from_code_repository: bool + allow_download_from_artifact_store: bool target_repository: str user: Optional[str] resources: @@ -194,7 +200,10 @@ steps: required_integrations: List[str] requirements: Union[NoneType, str, List[str]] skip_build: bool - source_files: SourceFileMode + prevent_build_reuse: bool + allow_including_files_in_images: bool + allow_download_from_code_repository: bool + allow_download_from_artifact_store: bool target_repository: str user: Optional[str] resources: diff --git a/docs/book/toc.md b/docs/book/toc.md index 174c622a931..5cc0e6c3af8 100644 --- a/docs/book/toc.md +++ b/docs/book/toc.md @@ -87,7 +87,7 @@ * [Trigger a pipeline from Python Client](how-to/trigger-pipelines/trigger-a-pipeline-from-client.md) * [Trigger a pipeline from another pipeline](how-to/trigger-pipelines/trigger-a-pipeline-from-another.md) * [Trigger a pipeline from REST API](how-to/trigger-pipelines/trigger-a-pipeline-from-rest-api.md) -* [🚨 Create and run templates](how-to/create-and-run-templates/README.md) +* [▶️ Create and run templates](how-to/create-and-run-templates/README.md) * [Create a run template](how-to/create-and-run-templates/create-a-run-template.md) * [Run a template](how-to/create-and-run-templates/run-a-template.md) * [📃 Use configuration files](how-to/use-configuration-files/README.md) From 0c2df924f1c864995a4436dc31db57b09d79b14d Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Wed, 7 Aug 2024 10:29:45 +0200 Subject: [PATCH 23/23] Fix alembic order --- .../migrations/versions/026d4577b6a0_add_code_path.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zenml/zen_stores/migrations/versions/026d4577b6a0_add_code_path.py b/src/zenml/zen_stores/migrations/versions/026d4577b6a0_add_code_path.py index 1111b89477b..ffe82454461 100644 --- a/src/zenml/zen_stores/migrations/versions/026d4577b6a0_add_code_path.py +++ b/src/zenml/zen_stores/migrations/versions/026d4577b6a0_add_code_path.py @@ -1,7 +1,7 @@ """Add code path [026d4577b6a0]. Revision ID: 026d4577b6a0 -Revises: 0.63.0 +Revises: 909550c7c4da Create Date: 2024-07-30 16:53:32.777594 """ @@ -12,7 +12,7 @@ # revision identifiers, used by Alembic. revision = "026d4577b6a0" -down_revision = "0.63.0" +down_revision = "909550c7c4da" branch_labels = None depends_on = None