diff --git a/repo2docker/buildpacks/base.py b/repo2docker/buildpacks/base.py index 6dd8839bf..0b80441e1 100644 --- a/repo2docker/buildpacks/base.py +++ b/repo2docker/buildpacks/base.py @@ -85,10 +85,15 @@ {{sd}} {% endfor %} -# Copy and chown stuff. This doubles the size of the repo, because -# you can't actually copy as USER, only as root! Thanks, Docker! +# FIXME: use COPY --chown with docker 17.09 to avoid copy+chown in two steps +{% if assemble_from_subset -%} +{% for f in assemble_files %} +COPY src/{{ f }} ${HOME}/{{ f }} +{% endfor %} +{% else -%} USER root COPY src/ ${HOME} +{% endif -%} RUN chown -R ${NB_USER}:${NB_USER} ${HOME} # Run assemble scripts! These will actually build the specification @@ -97,6 +102,13 @@ {{ sd }} {% endfor %} +{% if assemble_from_subset -%} +# Load the rest of the repo after assembling the environment +USER root +COPY src/ ${HOME} +RUN chown -R ${NB_USER}:${NB_USER} ${HOME} +{% endif -%} + # Container image Labels! # Put these at the end, since we don't want to rebuild everything # when these change! Did I mention I hate Dockerfile cache semantics? @@ -225,6 +237,27 @@ def get_build_scripts(self): """ return [] + def get_assemble_files(self): + """ + Ordered list of files required to run assemble scripts + + This should be the subset of files in the repository + that are needed to run the assembly scripts. + + If the scripts can be run with a subset of files, + then only these files will be present when the scripts run + and the rest of the repository will be loaded after + running the scripts (for better caching). + Otherwise, the entire repository will be present. + + Only used if assemble_from_subset=True, + which is not the default. + """ + + # whether I can be assembled with a subset of files + # change in subclasses that are sure that they can do this + assemble_from_subset = False + def get_assemble_scripts(self): """ Ordered list of shell script snippets to build the repo into the image. @@ -238,14 +271,6 @@ def get_assemble_scripts(self): the scripts that actually build the repository into the container image. - If this needs to be dynamically determined (based on the presence - or absence of certain files, for example), you can create any - method and decorate it with `traitlets.default('assemble_scripts)` - and the return value of this method is used as the value of - assemble_scripts. You can expect that the script is running in - the current directory of the repository being built when doing - dynamic detection. - You can use environment variable substitutions in both the username and the execution script. """ @@ -304,11 +329,14 @@ def render(self): path=self.get_path(), env=self.get_env(), labels=self.get_labels(), - build_script_directives=build_script_directives, assemble_script_directives=assemble_script_directives, + assemble_files=self.get_assemble_files(), + assemble_from_subset=self.assemble_from_subset, + build_script_directives=build_script_directives, build_script_files=self.get_build_script_files(), base_packages=sorted(self.get_base_packages()), post_build_scripts=self.get_post_build_scripts(), + ) def build(self, image_spec, memory_limit, build_args): @@ -375,6 +403,13 @@ def get_env(self): def detect(self): return True + def get_assemble_files(self): + apt_txt = self.binder_path('apt.txt') + files = [] + if os.path.exists(apt_txt): + files.append(apt_txt) + return files + def get_assemble_scripts(self): assemble_scripts = [] try: diff --git a/repo2docker/buildpacks/conda/__init__.py b/repo2docker/buildpacks/conda/__init__.py index 358ad580f..21ed51c7e 100644 --- a/repo2docker/buildpacks/conda/__init__.py +++ b/repo2docker/buildpacks/conda/__init__.py @@ -16,6 +16,10 @@ class CondaBuildPack(BaseImage): + + # conda envs can be installed with a subset of files + assemble_with_subset = True + def get_env(self): return super().get_env() + [ ('CONDA_DIR', '${APP_BASE}/conda'), @@ -109,6 +113,18 @@ def py2(self): """Am I building a Python 2 kernel environment?""" return self.python_version and self.python_version.split('.')[0] == '2' + def get_assemble_files(self): + """Specify that assembly only requires environment.yml + + enables caching assembly result even when + repo contents change + """ + assemble_files = super().get_assemble_files() + environment_yml = self.binder_path('environment.yml') + if os.path.exists(environment_yml): + assemble_files.append(environment_yml) + return assemble_files + def get_assemble_scripts(self): assembly_scripts = [] environment_yml = self.binder_path('environment.yml')