Skip to content

Commit

Permalink
add assemble_subset
Browse files Browse the repository at this point in the history
- get_assemble_files() returns list of files needed for assembly
- if buildpack.assemble_with_subset is set, only load assemble_files
  prior to running assembly scripts. Load the rest of the repo afterward

conda opts in to this, but currently I think this works
for everything *except* requirements.txt
  • Loading branch information
minrk committed Feb 9, 2018
1 parent faf293e commit c16e995
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 11 deletions.
57 changes: 46 additions & 11 deletions repo2docker/buildpacks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,15 @@
{{sd}}
{% endfor %}
# Copy and chown stuff. This doubles the size of the repo, because
# you can't actually copy as USER, only as root! Thanks, Docker!
# FIXME: use COPY --chown with docker 17.09 to avoid copy+chown in two steps
{% if assemble_from_subset -%}
{% for f in assemble_files %}
COPY src/{{ f }} ${HOME}/{{ f }}
{% endfor %}
{% else -%}
USER root
COPY src/ ${HOME}
{% endif -%}
RUN chown -R ${NB_USER}:${NB_USER} ${HOME}
# Run assemble scripts! These will actually build the specification
Expand All @@ -97,6 +102,13 @@
{{ sd }}
{% endfor %}
{% if assemble_from_subset -%}
# Load the rest of the repo after assembling the environment
USER root
COPY src/ ${HOME}
RUN chown -R ${NB_USER}:${NB_USER} ${HOME}
{% endif -%}
# Container image Labels!
# Put these at the end, since we don't want to rebuild everything
# when these change! Did I mention I hate Dockerfile cache semantics?
Expand Down Expand Up @@ -225,6 +237,27 @@ def get_build_scripts(self):
"""
return []

def get_assemble_files(self):
"""
Ordered list of files required to run assemble scripts
This should be the subset of files in the repository
that are needed to run the assembly scripts.
If the scripts can be run with a subset of files,
then only these files will be present when the scripts run
and the rest of the repository will be loaded after
running the scripts (for better caching).
Otherwise, the entire repository will be present.
Only used if assemble_from_subset=True,
which is not the default.
"""

# whether I can be assembled with a subset of files
# change in subclasses that are sure that they can do this
assemble_from_subset = False

def get_assemble_scripts(self):
"""
Ordered list of shell script snippets to build the repo into the image.
Expand All @@ -238,14 +271,6 @@ def get_assemble_scripts(self):
the scripts that actually build the repository into the container
image.
If this needs to be dynamically determined (based on the presence
or absence of certain files, for example), you can create any
method and decorate it with `traitlets.default('assemble_scripts)`
and the return value of this method is used as the value of
assemble_scripts. You can expect that the script is running in
the current directory of the repository being built when doing
dynamic detection.
You can use environment variable substitutions in both the
username and the execution script.
"""
Expand Down Expand Up @@ -304,11 +329,14 @@ def render(self):
path=self.get_path(),
env=self.get_env(),
labels=self.get_labels(),
build_script_directives=build_script_directives,
assemble_script_directives=assemble_script_directives,
assemble_files=self.get_assemble_files(),
assemble_from_subset=self.assemble_from_subset,
build_script_directives=build_script_directives,
build_script_files=self.get_build_script_files(),
base_packages=sorted(self.get_base_packages()),
post_build_scripts=self.get_post_build_scripts(),

)

def build(self, image_spec, memory_limit, build_args):
Expand Down Expand Up @@ -375,6 +403,13 @@ def get_env(self):
def detect(self):
return True

def get_assemble_files(self):
apt_txt = self.binder_path('apt.txt')
files = []
if os.path.exists(apt_txt):
files.append(apt_txt)
return files

def get_assemble_scripts(self):
assemble_scripts = []
try:
Expand Down
16 changes: 16 additions & 0 deletions repo2docker/buildpacks/conda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@


class CondaBuildPack(BaseImage):

# conda envs can be installed with a subset of files
assemble_with_subset = True

def get_env(self):
return super().get_env() + [
('CONDA_DIR', '${APP_BASE}/conda'),
Expand Down Expand Up @@ -109,6 +113,18 @@ def py2(self):
"""Am I building a Python 2 kernel environment?"""
return self.python_version and self.python_version.split('.')[0] == '2'

def get_assemble_files(self):
"""Specify that assembly only requires environment.yml
enables caching assembly result even when
repo contents change
"""
assemble_files = super().get_assemble_files()
environment_yml = self.binder_path('environment.yml')
if os.path.exists(environment_yml):
assemble_files.append(environment_yml)
return assemble_files

def get_assemble_scripts(self):
assembly_scripts = []
environment_yml = self.binder_path('environment.yml')
Expand Down

0 comments on commit c16e995

Please sign in to comment.