Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] add assemble_subset #410

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 45 additions & 11 deletions repo2docker/buildpacks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,15 @@
{{sd}}
{% endfor %}

# Copy and chown stuff. This doubles the size of the repo, because
# you can't actually copy as USER, only as root! Thanks, Docker!
# FIXME: use COPY --chown with docker 17.09 to avoid copy+chown in two steps
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move this to above L105

{% if assemble_from_subset -%}
{% for f in assemble_files %}
COPY src/{{ f }} ${HOME}/{{ f }}
{% endfor %}
{% else -%}
USER root
COPY src/ ${HOME}
{% endif -%}
RUN chown -R ${NB_USER}:${NB_USER} ${HOME}

{% if env -%}
Expand All @@ -113,6 +118,13 @@
{{ sd }}
{% endfor %}

{% if assemble_from_subset -%}
# Load the rest of the repo after assembling the environment
USER root
COPY src/ ${HOME}
RUN chown -R ${NB_USER}:${NB_USER} ${HOME}
{% endif -%}

# Container image Labels!
# Put these at the end, since we don't want to rebuild everything
# when these change! Did I mention I hate Dockerfile cache semantics?
Expand Down Expand Up @@ -301,6 +313,27 @@ def get_build_scripts(self):
"""
return []

def get_assemble_files(self):
"""
Ordered list of files required to run assemble scripts

This should be the subset of files in the repository
that are needed to run the assembly scripts.

If the scripts can be run with a subset of files,
then only these files will be present when the scripts run
and the rest of the repository will be loaded after
running the scripts (for better caching).
Otherwise, the entire repository will be present.

Only used if assemble_from_subset=True,
which is not the default.
"""

# whether I can be assembled with a subset of files
# change in subclasses that are sure that they can do this
assemble_from_subset = False

def get_assemble_scripts(self):
"""
Ordered list of shell script snippets to build the repo into the image.
Expand All @@ -314,14 +347,6 @@ def get_assemble_scripts(self):
the scripts that actually build the repository into the container
image.

If this needs to be dynamically determined (based on the presence
or absence of certain files, for example), you can create any
method and decorate it with `traitlets.default('assemble_scripts)`
and the return value of this method is used as the value of
assemble_scripts. You can expect that the script is running in
the current directory of the repository being built when doing
dynamic detection.

You can use environment variable substitutions in both the
username and the execution script.
"""
Expand Down Expand Up @@ -396,8 +421,10 @@ def render(self):
build_env=self.get_build_env(),
env=self.get_env(),
labels=self.get_labels(),
build_script_directives=build_script_directives,
assemble_script_directives=assemble_script_directives,
assemble_files=self.get_assemble_files(),
assemble_from_subset=self.assemble_from_subset,
build_script_directives=build_script_directives,
build_script_files=self.get_build_script_files(),
base_packages=sorted(self.get_base_packages()),
post_build_scripts=self.get_post_build_scripts(),
Expand Down Expand Up @@ -489,6 +516,13 @@ def get_env(self):
def detect(self):
return True

def get_assemble_files(self):
apt_txt = self.binder_path('apt.txt')
files = []
if os.path.exists(apt_txt):
files.append(apt_txt)
return files

def get_assemble_scripts(self):
assemble_scripts = []
try:
Expand Down
16 changes: 16 additions & 0 deletions repo2docker/buildpacks/conda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ class CondaBuildPack(BaseImage):
Uses miniconda since it is more lightweight than Anaconda.

"""

# conda envs can be installed with a subset of files
assemble_with_subset = True

def get_build_env(self):
"""Return environment variables to be set.

Expand Down Expand Up @@ -169,6 +173,18 @@ def py2(self):
"""Am I building a Python 2 kernel environment?"""
return self.python_version and self.python_version.split('.')[0] == '2'

def get_assemble_files(self):
"""Specify that assembly only requires environment.yml

enables caching assembly result even when
repo contents change
"""
assemble_files = super().get_assemble_files()
environment_yml = self.binder_path('environment.yml')
if os.path.exists(environment_yml):
assemble_files.append(environment_yml)
return assemble_files

def get_assemble_scripts(self):
"""Return series of build-steps specific to this source repository.
"""
Expand Down
2 changes: 2 additions & 0 deletions repo2docker/buildpacks/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ class DockerBuildPack(BuildPack):
"""Docker BuildPack"""
dockerfile = "Dockerfile"

assemble_from_subset = False

def detect(self):
"""Check if current repo should be built with the Docker BuildPack"""
return os.path.exists(self.binder_path('Dockerfile'))
Expand Down
12 changes: 12 additions & 0 deletions repo2docker/buildpacks/julia.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ class JuliaBuildPack(CondaBuildPack):
See https://github.com/JuliaPy/PyCall.jl/issues/410

"""

# REQUIRE can't refer to local files, can it?
assemble_from_subset = True
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth checking with #393 and the new way of specifying dependencies in Julia.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! I think it will affect Julia with the project stuff.


def get_build_env(self):
"""Get additional environment settings for Julia and Jupyter

Expand Down Expand Up @@ -86,6 +90,14 @@ def get_build_scripts(self):
)
]

def get_assemble_files(self):
"""Add environment files required for assembly step"""
assemble_files = super().get_assemble_files()
require = self.binder_path('REQUIRE')
if os.path.exists(require):
assemble_files.append(require)
return assemble_files

def get_assemble_scripts(self):
"""
Return series of build-steps specific to "this" Julia repository
Expand Down
56 changes: 56 additions & 0 deletions repo2docker/buildpacks/python/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,62 @@ def python_version(self):
self._python_version = py_version
return self._python_version

def get_assemble_files(self):
assemble_files = super().get_assemble_files()
for name in ('requirements.txt', 'requirements3.txt'):
requirements_txt = self.binder_path(name)
if os.path.exists(requirements_txt):
assemble_files.append(requirements_txt)
return assemble_files

def _is_local_requirement(self, line):
"""Return whether a line in a requirements.txt file references a local file"""
# trim comments and skip empty lines
line = line.split('#', 1)[0].strip()
if not line:
return False
if line.startswith(('-r', '-c')):
# local -r or -c references break isolation
return True
# strip off `-e, etc.`
if line.startswith('-'):
line = line.split(None, 1)[1]
if 'file://' in line:
# file references break isolation
return True
if '://' in line:
# handle git://../local/file
path = line.split('://', 1)[1]
else:
path = line
if path.startswith('.'):
# references a local file
return True
return False

@property
def assemble_from_subset(self):
"""Peek in requirements.txt to determine if we can assemble from only env files

If there are any local references, e.g. `-e .`,
stage the whole repo prior to installation.
"""
if not os.path.exists('binder') and os.path.exists('setup.py'):
# can't install from subset if we're using setup.py
return False
for name in ('requirements.txt', 'requirements3.txt'):
requirements_txt = self.binder_path(name)
if not os.path.exists(requirements_txt):
continue
with open(requirements_txt) as f:
for line in f:
if self._is_local_requirement(line):
return False

# didn't find any local references,
# allow assembly from subset
return True

def get_assemble_scripts(self):
"""Return series of build-steps specific to this repository.
"""
Expand Down
5 changes: 5 additions & 0 deletions repo2docker/buildpacks/r.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ class RBuildPack(PythonBuildPack):
The `r-base` package from Ubuntu apt repositories is used to install
R itself, rather than any of the methods from https://cran.r-project.org/.
"""

# cannot assemble from subset because INSTALL.r could
# refer to local files (right?)
assemble_from_subset = False

@property
def runtime(self):
"""
Expand Down