Skip to content

Commit

Permalink
Merge pull request jupyterhub#743 from minrk/conda-env-first-again-again
Browse files Browse the repository at this point in the history
[MRG] preassembly for conda/python
  • Loading branch information
betatim authored Jul 19, 2019
2 parents 4d3b60e + 3aa7e39 commit 231a2d8
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 49 deletions.
19 changes: 12 additions & 7 deletions repo2docker/buildpacks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@
{% endfor -%}
{% endif -%}
{% if preassemble_script_directives -%}
USER root
RUN chown -R ${NB_USER}:${NB_USER} ${REPO_DIR}
{% endif -%}
{% for sd in preassemble_script_directives -%}
{{ sd }}
{% endfor %}
Expand Down Expand Up @@ -711,12 +716,8 @@ def get_preassemble_scripts(self):
except FileNotFoundError:
pass

return scripts

def get_assemble_scripts(self):
assemble_scripts = []
if "py" in self.stencila_contexts:
assemble_scripts.extend(
scripts.extend(
[
(
"${NB_USER}",
Expand All @@ -728,7 +729,7 @@ def get_assemble_scripts(self):
]
)
if self.stencila_manifest_dir:
assemble_scripts.extend(
scripts.extend(
[
(
"${NB_USER}",
Expand All @@ -741,7 +742,11 @@ def get_assemble_scripts(self):
)
]
)
return assemble_scripts
return scripts

def get_assemble_scripts(self):
"""Return directives to run after the entire repository has been added to the image"""
return []

def get_post_build_scripts(self):
post_build = self.binder_path("postBuild")
Expand Down
110 changes: 83 additions & 27 deletions repo2docker/buildpacks/conda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from ruamel.yaml import YAML

from ..base import BaseImage
from ...utils import is_local_pip_requirement

# pattern for parsing conda dependency line
PYTHON_REGEX = re.compile(r"python\s*=+\s*([\d\.]*)")
Expand Down Expand Up @@ -127,6 +128,50 @@ def get_build_script_files(self):
files.update(super().get_build_script_files())
return files

_environment_yaml = None

@property
def environment_yaml(self):
if self._environment_yaml is not None:
return self._environment_yaml

environment_yml = self.binder_path("environment.yml")
if not os.path.exists(environment_yml):
self._environment_yaml = {}
return self._environment_yaml

with open(environment_yml) as f:
env = YAML().load(f)
# check if the env file is empty, if so instantiate an empty dictionary.
if env is None:
env = {}
# check if the env file provided a dict-like thing not a list or other data structure.
if not isinstance(env, Mapping):
raise TypeError(
"environment.yml should contain a dictionary. Got %r" % type(env)
)
self._environment_yaml = env

return self._environment_yaml

@property
def _should_preassemble_env(self):
"""Check for local pip requirements in environment.yaml
If there are any local references, e.g. `-e .`,
stage the whole repo prior to installation.
"""
dependencies = self.environment_yaml.get("dependencies", [])
pip_requirements = None
for dep in dependencies:
if isinstance(dep, dict) and dep.get("pip"):
pip_requirements = dep["pip"]
if isinstance(pip_requirements, list):
for line in pip_requirements:
if is_local_pip_requirement(line):
return False
return True

@property
def python_version(self):
"""Detect the Python version for a given `environment.yml`
Expand All @@ -135,31 +180,17 @@ def python_version(self):
or a Falsy empty string '' if not found.
"""
environment_yml = self.binder_path("environment.yml")
if not os.path.exists(environment_yml):
return ""

if not hasattr(self, "_python_version"):
py_version = None
with open(environment_yml) as f:
env = YAML().load(f)
# check if the env file is empty, if so instantiate an empty dictionary.
if env is None:
env = {}
# check if the env file provided a dick-like thing not a list or other data structure.
if not isinstance(env, Mapping):
raise TypeError(
"environment.yml should contain a dictionary. Got %r"
% type(env)
)
for dep in env.get("dependencies", []):
if not isinstance(dep, str):
continue
match = PYTHON_REGEX.match(dep)
if not match:
continue
py_version = match.group(1)
break
env = self.environment_yaml
for dep in env.get("dependencies", []):
if not isinstance(dep, str):
continue
match = PYTHON_REGEX.match(dep)
if not match:
continue
py_version = match.group(1)
break

# extract major.minor
if py_version:
Expand All @@ -178,14 +209,27 @@ def py2(self):
"""Am I building a Python 2 kernel environment?"""
return self.python_version and self.python_version.split(".")[0] == "2"

def get_assemble_scripts(self):
def get_preassemble_script_files(self):
"""preassembly only requires environment.yml
enables caching assembly result even when
repo contents change
"""
assemble_files = super().get_preassemble_script_files()
if self._should_preassemble_env:
environment_yml = self.binder_path("environment.yml")
if os.path.exists(environment_yml):
assemble_files[environment_yml] = environment_yml
return assemble_files

def get_env_scripts(self):
"""Return series of build-steps specific to this source repository.
"""
assembly_scripts = []
scripts = []
environment_yml = self.binder_path("environment.yml")
env_prefix = "${KERNEL_PYTHON_PREFIX}" if self.py2 else "${NB_PYTHON_PREFIX}"
if os.path.exists(environment_yml):
assembly_scripts.append(
scripts.append(
(
"${NB_USER}",
r"""
Expand All @@ -197,7 +241,19 @@ def get_assemble_scripts(self):
),
)
)
return super().get_assemble_scripts() + assembly_scripts
return scripts

def get_preassemble_scripts(self):
scripts = super().get_preassemble_scripts()
if self._should_preassemble_env:
scripts.extend(self.get_env_scripts())
return scripts

def get_assemble_scripts(self):
scripts = super().get_assemble_scripts()
if not self._should_preassemble_env:
scripts.extend(self.get_env_scripts())
return scripts

def detect(self):
"""Check if current repo should be built with the Conda BuildPack.
Expand Down
23 changes: 18 additions & 5 deletions repo2docker/buildpacks/pipfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,24 @@ def python_version(self):
self._python_version = self.major_pythons["3"]
return self._python_version

def get_preassemble_script_files(self):
"""Return files needed for preassembly"""
files = super().get_preassemble_script_files()
for name in ("requirements3.txt", "Pipfile", "Pipfile.lock"):
path = self.binder_path(name)
if os.path.exists(path):
files[path] = path
return files

def get_preassemble_scripts(self):
"""scripts to run prior to staging the repo contents"""
scripts = super().get_preassemble_scripts()
# install pipenv to install dependencies within Pipfile.lock or Pipfile
scripts.append(
("${NB_USER}", "${KERNEL_PYTHON_PREFIX}/bin/pip install pipenv==2018.11.26")
)
return scripts

def get_assemble_scripts(self):
"""Return series of build-steps specific to this repository.
"""
Expand Down Expand Up @@ -113,11 +131,6 @@ def get_assemble_scripts(self):
# my_package_example = {path=".", editable=true}
working_directory = self.binder_dir or "."

# install pipenv to install dependencies within Pipfile.lock or Pipfile
assemble_scripts.append(
("${NB_USER}", "${KERNEL_PYTHON_PREFIX}/bin/pip install pipenv==2018.11.26")
)

# NOTES:
# - Without prioritizing the PATH to KERNEL_PYTHON_PREFIX over
# NB_SERVER_PYTHON_PREFIX, 'pipenv' draws the wrong conclusion about
Expand Down
72 changes: 62 additions & 10 deletions repo2docker/buildpacks/python/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os

from ..conda import CondaBuildPack
from ...utils import is_local_pip_requirement


class PythonBuildPack(CondaBuildPack):
Expand Down Expand Up @@ -34,24 +35,22 @@ def python_version(self):
self._python_version = py_version
return self._python_version

def get_assemble_scripts(self):
"""Return series of build-steps specific to this repository.
def _get_pip_scripts(self):
"""Get pip install scripts
added to preassemble unless local references are found,
in which case this happens in assemble.
"""
# If we have a runtime.txt & that's set to python-2.7,
# requirements.txt will be installed in the *kernel* env
# and requirements3.txt (if it exists)
# will be installed in the python 3 notebook server env.
assemble_scripts = super().get_assemble_scripts()
setup_py = "setup.py"
# KERNEL_PYTHON_PREFIX is the env with the kernel,
# whether it's distinct from the notebook or the same.
pip = "${KERNEL_PYTHON_PREFIX}/bin/pip"
scripts = []
if self.py2:
# using python 2 kernel,
# requirements3.txt allows installation in the notebook server env
nb_requirements_file = self.binder_path("requirements3.txt")
if os.path.exists(nb_requirements_file):
assemble_scripts.append(
scripts.append(
(
"${NB_USER}",
# want the $NB_PYHTON_PREFIX environment variable, not for
Expand All @@ -65,12 +64,65 @@ def get_assemble_scripts(self):
# install requirements.txt in the kernel env
requirements_file = self.binder_path("requirements.txt")
if os.path.exists(requirements_file):
assemble_scripts.append(
scripts.append(
(
"${NB_USER}",
'{} install --no-cache-dir -r "{}"'.format(pip, requirements_file),
)
)
return scripts

@property
def _should_preassemble_pip(self):
"""Peek in requirements.txt to determine if we can assemble from only env files
If there are any local references, e.g. `-e .`,
stage the whole repo prior to installation.
"""
if not os.path.exists("binder") and os.path.exists("setup.py"):
# can't install from subset if we're using setup.py
return False
for name in ("requirements.txt", "requirements3.txt"):
requirements_txt = self.binder_path(name)
if not os.path.exists(requirements_txt):
continue
with open(requirements_txt) as f:
for line in f:
if is_local_pip_requirement(line):
return False

# didn't find any local references,
# allow assembly from subset
return True

def get_preassemble_script_files(self):
assemble_files = super().get_preassemble_script_files()
for name in ("requirements.txt", "requirements3.txt"):
requirements_txt = self.binder_path(name)
if os.path.exists(requirements_txt):
assemble_files[requirements_txt] = requirements_txt
return assemble_files

def get_preassemble_scripts(self):
"""Return scripts to run before adding the full repository"""
scripts = super().get_preassemble_scripts()
if self._should_preassemble_pip:
scripts.extend(self._get_pip_scripts())
return scripts

def get_assemble_scripts(self):
"""Return series of build steps that require the full repository"""
# If we have a runtime.txt & that's set to python-2.7,
# requirements.txt will be installed in the *kernel* env
# and requirements3.txt (if it exists)
# will be installed in the python 3 notebook server env.
assemble_scripts = super().get_assemble_scripts()
setup_py = "setup.py"
# KERNEL_PYTHON_PREFIX is the env with the kernel,
# whether it's distinct from the notebook or the same.
pip = "${KERNEL_PYTHON_PREFIX}/bin/pip"
if not self._should_preassemble_pip:
assemble_scripts.extend(self._get_pip_scripts())

# setup.py exists *and* binder dir is not used
if not self.binder_dir and os.path.exists(setup_py):
Expand Down
26 changes: 26 additions & 0 deletions repo2docker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,3 +431,29 @@ def normalize_doi(val):
(e.g. https://doi.org/10.1234/jshd123)"""
m = doi_regexp.match(val)
return m.group(2)


def is_local_pip_requirement(line):
"""Return whether a pip requirement (e.g. in requirements.txt file) references a local file"""
# trim comments and skip empty lines
line = line.split("#", 1)[0].strip()
if not line:
return False
if line.startswith(("-r", "-c")):
# local -r or -c references break isolation
return True
# strip off `-e, etc.`
if line.startswith("-"):
line = line.split(None, 1)[1]
if "file://" in line:
# file references break isolation
return True
if "://" in line:
# handle git://../local/file
path = line.split("://", 1)[1]
else:
path = line
if path.startswith("."):
# references a local file
return True
return False
17 changes: 17 additions & 0 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,20 @@ def test_normalize_doi():
assert utils.normalize_doi("http://doi.org/10.1234/jshd123") == "10.1234/jshd123"
assert utils.normalize_doi("https://doi.org/10.1234/jshd123") == "10.1234/jshd123"
assert utils.normalize_doi("http://dx.doi.org/10.1234/jshd123") == "10.1234/jshd123"


@pytest.mark.parametrize(
"req, is_local",
[
("-r requirements.txt", True),
("-e .", True),
("file://subdir", True),
("file://./subdir", True),
("git://github.com/jupyter/repo2docker", False),
("git+https://github.com/jupyter/repo2docker", False),
("numpy", False),
("# -e .", False),
],
)
def test_local_pip_requirement(req, is_local):
assert utils.is_local_pip_requirement(req) == is_local

0 comments on commit 231a2d8

Please sign in to comment.