Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] preassembly for conda/python #743

Merged
merged 12 commits into from
Jul 19, 2019
14 changes: 7 additions & 7 deletions repo2docker/buildpacks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,12 +711,8 @@ def get_preassemble_scripts(self):
except FileNotFoundError:
pass

return scripts

def get_assemble_scripts(self):
assemble_scripts = []
if "py" in self.stencila_contexts:
assemble_scripts.extend(
scripts.extend(
[
(
"${NB_USER}",
Expand All @@ -728,7 +724,7 @@ def get_assemble_scripts(self):
]
)
if self.stencila_manifest_dir:
assemble_scripts.extend(
scripts.extend(
[
(
"${NB_USER}",
Expand All @@ -741,7 +737,11 @@ def get_assemble_scripts(self):
)
]
)
return assemble_scripts
return scripts

def get_assemble_scripts(self):
"""Return directives to run after the entire repository has been added to the image"""
return []

def get_post_build_scripts(self):
post_build = self.binder_path("postBuild")
Expand Down
16 changes: 14 additions & 2 deletions repo2docker/buildpacks/conda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,19 @@ def py2(self):
"""Am I building a Python 2 kernel environment?"""
return self.python_version and self.python_version.split(".")[0] == "2"

def get_assemble_scripts(self):
def get_preassemble_script_files(self):
"""preassembly only requires environment.yml

enables caching assembly result even when
repo contents change
"""
assemble_files = super().get_preassemble_script_files()
environment_yml = self.binder_path("environment.yml")
if os.path.exists(environment_yml):
assemble_files[environment_yml] = environment_yml
betatim marked this conversation as resolved.
Show resolved Hide resolved
return assemble_files

def get_preassemble_scripts(self):
"""Return series of build-steps specific to this source repository.
"""
assembly_scripts = []
Expand All @@ -197,7 +209,7 @@ def get_assemble_scripts(self):
),
)
)
return super().get_assemble_scripts() + assembly_scripts
return super().get_preassemble_scripts() + assembly_scripts

def detect(self):
"""Check if current repo should be built with the Conda BuildPack.
Expand Down
11 changes: 10 additions & 1 deletion repo2docker/buildpacks/pipfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,16 @@ def python_version(self):
self._python_version = self.major_pythons["3"]
return self._python_version

def get_assemble_scripts(self):
def get_preassemble_script_files(self):
"""Return files needed for preassembly"""
files = super().get_preassemble_script_files()
for name in ("requirements3.txt", "Pipfile", "Pipfile.lock"):
path = self.binder_path(name)
if os.path.exists(path):
files[path] = path
return files

def get_preassemble_scripts(self):
"""Return series of build-steps specific to this repository.
"""
# If we have either Pipfile.lock, Pipfile, or runtime.txt declare the
Expand Down
96 changes: 86 additions & 10 deletions repo2docker/buildpacks/python/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,24 +34,47 @@ def python_version(self):
self._python_version = py_version
return self._python_version

def get_assemble_scripts(self):
"""Return series of build-steps specific to this repository.
def _is_local_requirement(self, line):
"""Return whether a line in a requirements.txt file references a local file"""
# trim comments and skip empty lines
line = line.split("#", 1)[0].strip()
if not line:
return False
if line.startswith(("-r", "-c")):
# local -r or -c references break isolation
return True
# strip off `-e, etc.`
if line.startswith("-"):
line = line.split(None, 1)[1]
if "file://" in line:
# file references break isolation
return True
if "://" in line:
# handle git://../local/file
path = line.split("://", 1)[1]
else:
path = line
if path.startswith("."):
# references a local file
return True
return False

def _get_pip_scripts(self):
"""Get pip install scripts

added to preassemble unless local references are found,
in which case this happens in assemble.
"""
# If we have a runtime.txt & that's set to python-2.7,
# requirements.txt will be installed in the *kernel* env
# and requirements3.txt (if it exists)
# will be installed in the python 3 notebook server env.
assemble_scripts = super().get_assemble_scripts()
setup_py = "setup.py"
# KERNEL_PYTHON_PREFIX is the env with the kernel,
# whether it's distinct from the notebook or the same.
pip = "${KERNEL_PYTHON_PREFIX}/bin/pip"
scripts = []
if self.py2:
# using python 2 kernel,
# requirements3.txt allows installation in the notebook server env
nb_requirements_file = self.binder_path("requirements3.txt")
if os.path.exists(nb_requirements_file):
assemble_scripts.append(
scripts.append(
(
"${NB_USER}",
# want the $NB_PYHTON_PREFIX environment variable, not for
Expand All @@ -65,12 +88,65 @@ def get_assemble_scripts(self):
# install requirements.txt in the kernel env
requirements_file = self.binder_path("requirements.txt")
if os.path.exists(requirements_file):
assemble_scripts.append(
scripts.append(
(
"${NB_USER}",
'{} install --no-cache-dir -r "{}"'.format(pip, requirements_file),
)
)
return scripts

@property
def _should_preassemble_pip(self):
"""Peek in requirements.txt to determine if we can assemble from only env files

If there are any local references, e.g. `-e .`,
stage the whole repo prior to installation.
"""
if not os.path.exists("binder") and os.path.exists("setup.py"):
# can't install from subset if we're using setup.py
return False
for name in ("requirements.txt", "requirements3.txt"):
requirements_txt = self.binder_path(name)
if not os.path.exists(requirements_txt):
continue
with open(requirements_txt) as f:
for line in f:
if self._is_local_requirement(line):
return False

# didn't find any local references,
# allow assembly from subset
return True

def get_preassemble_script_files(self):
assemble_files = super().get_preassemble_script_files()
for name in ("requirements.txt", "requirements3.txt"):
requirements_txt = self.binder_path(name)
if os.path.exists(requirements_txt):
assemble_files[requirements_txt] = requirements_txt
return assemble_files

def get_preassemble_scripts(self):
"""Return scripts to run before adding the full repository"""
scripts = super().get_preassemble_scripts()
if self._should_preassemble_pip:
scripts.extend(self._get_pip_scripts())
return scripts

def get_assemble_scripts(self):
"""Return series of build steps that require the full repository"""
# If we have a runtime.txt & that's set to python-2.7,
# requirements.txt will be installed in the *kernel* env
# and requirements3.txt (if it exists)
# will be installed in the python 3 notebook server env.
assemble_scripts = super().get_assemble_scripts()
setup_py = "setup.py"
# KERNEL_PYTHON_PREFIX is the env with the kernel,
# whether it's distinct from the notebook or the same.
pip = "${KERNEL_PYTHON_PREFIX}/bin/pip"
if not self._should_preassemble_pip:
assemble_scripts.extend(self._get_pip_scripts())

# setup.py exists *and* binder dir is not used
if not self.binder_dir and os.path.exists(setup_py):
Expand Down