Skip to content

Commit

Permalink
Merge branch 'main' into adding-packages-2.3
Browse files Browse the repository at this point in the history
  • Loading branch information
claytonparnell authored Jan 13, 2025
2 parents a115b18 + 1a090b2 commit 61626e7
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 29 deletions.
8 changes: 8 additions & 0 deletions template/v2/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ ENV STUDIO_LOGGING_DIR="/var/log/studio/"
ENV EDITOR="nano"
ENV IMAGE_VERSION=$IMAGE_VERSION
ENV PINNED_MICROMAMBA_MINOR_VERSION="1.5.*"
ENV SAGEMAKER_RECOVERY_MODE_HOME=/tmp/sagemaker-recovery-mode-home

USER root
# Upgrade micromamba to the latest patch version in the pinned minor version range, if applicable
Expand Down Expand Up @@ -91,6 +92,13 @@ RUN if [[ -z $ARG_BASED_ENV_IN_FILENAME ]] ; \
fi && \
# Enforce dependencies are all installed from conda-forge
micromamba install -y --name base --file /tmp/$ENV_IN_FILENAME && \
mkdir -p $SAGEMAKER_RECOVERY_MODE_HOME && \
chown $MAMBA_USER:$MAMBA_USER $SAGEMAKER_RECOVERY_MODE_HOME && \
JUPYTERLAB_VERSION=$(grep "^conda-forge::jupyterlab\[" /tmp/$ENV_IN_FILENAME) && \
SAGEMAKER_JUPYTERLAB_VERSION=$(grep "^conda-forge::sagemaker-jupyterlab-extension" /tmp/$ENV_IN_FILENAME) && \
echo "Installing in sagemaker-recovery-mode micromamba environment: $JUPYTERLAB_VERSION $SAGEMAKER_JUPYTERLAB_VERSION" && \
micromamba create -n sagemaker-recovery-mode && \
micromamba install -n sagemaker-recovery-mode -y $JUPYTERLAB_VERSION $SAGEMAKER_JUPYTERLAB_VERSION && \
micromamba clean --all --yes --force-pkgs-dirs && \
rm -rf /tmp/*.in && \
sudo ln -s $(which python3) /usr/bin/python && \
Expand Down
10 changes: 8 additions & 2 deletions template/v2/dirs/usr/local/bin/entrypoint-jupyter-server
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,14 @@ set -e
# micromamba commands (e.g. using `micromamba activate` to activate environments)
eval "$(micromamba shell hook --shell=bash)"

# Activate conda environment 'base', where supervisord is installed
micromamba activate base
if [ -n "$SAGEMAKER_RECOVERY_MODE" ]; then
export HOME=$SAGEMAKER_RECOVERY_MODE
# Activate conda environment `sagemaker-recovery-mode`
micromamba activate sagemaker-recovery-mode
else
# Activate conda environment 'base'
micromamba activate base
fi

# Set up SAGEMAKER_APP_TYPE_LOWERCASE based on SAGEMAKER_APP_TYPE
export SAGEMAKER_APP_TYPE_LOWERCASE=$(echo $SAGEMAKER_APP_TYPE | tr '[:upper:]' '[:lower:]')
Expand Down
10 changes: 8 additions & 2 deletions template/v2/dirs/usr/local/bin/start-jupyter-server
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,14 @@ set -e

eval "$(micromamba shell hook --shell=bash)"

# Activate conda environment 'base', which is the default environment for Cosmos
micromamba activate base
if [ -n "$SAGEMAKER_RECOVERY_MODE" ]; then
export HOME=$SAGEMAKER_RECOVERY_MODE
# Activate conda environment `sagemaker-recovery-mode`
micromamba activate sagemaker-recovery-mode
else
# Activate conda environment 'base'
micromamba activate base
fi

# Start Jupyter server in rtc mode for shared spaces
if [ -n "$SAGEMAKER_APP_TYPE_LOWERCASE" ] && [ "$SAGEMAKER_SPACE_TYPE_LOWERCASE" == "shared" ]; then
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
ARG SAGEMAKER_DISTRIBUTION_IMAGE
FROM $SAGEMAKER_DISTRIBUTION_IMAGE

ARG MAMBA_DOCKERFILE_ACTIVATE=1

ENV SAGEMAKER_RECOVERY_MODE=true

ENTRYPOINT ["/usr/local/bin/entrypoint-jupyter-server"]
152 changes: 127 additions & 25 deletions test/test_dockerfile_based_harness.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import subprocess
import time
from typing import List

import docker
Expand All @@ -21,7 +22,10 @@
("autogluon.test.Dockerfile", ["autogluon"]),
("matplotlib.test.Dockerfile", ["matplotlib"]),
("matplotlib.test.Dockerfile", ["matplotlib-base"]),
("sagemaker-headless-execution-driver.test.Dockerfile", ["sagemaker-headless-execution-driver"]),
(
"sagemaker-headless-execution-driver.test.Dockerfile",
["sagemaker-headless-execution-driver"],
),
("scipy.test.Dockerfile", ["scipy"]),
("numpy.test.Dockerfile", ["numpy"]),
("boto3.test.Dockerfile", ["boto3"]),
Expand All @@ -38,22 +42,38 @@
("notebook.test.Dockerfile", ["notebook"]),
("glue-sessions.test.Dockerfile", ["aws-glue-sessions"]),
("altair.test.Dockerfile", ["altair"]),
("sagemaker-studio-analytics-extension.test.Dockerfile", ["sagemaker-studio-analytics-extension"]),
("amazon-codewhisperer-jupyterlab-ext.test.Dockerfile", ["amazon-codewhisperer-jupyterlab-ext"]),
(
"sagemaker-studio-analytics-extension.test.Dockerfile",
["sagemaker-studio-analytics-extension"],
),
(
"amazon-codewhisperer-jupyterlab-ext.test.Dockerfile",
["amazon-codewhisperer-jupyterlab-ext"],
),
("jupyterlab-git.test.Dockerfile", ["jupyterlab-git"]),
("amazon-sagemaker-sql-magic.test.Dockerfile", ["amazon-sagemaker-sql-magic"]),
("amazon_sagemaker_sql_editor.test.Dockerfile", ["amazon_sagemaker_sql_editor"]),
(
"amazon_sagemaker_sql_editor.test.Dockerfile",
["amazon_sagemaker_sql_editor"],
),
("serve.test.Dockerfile", ["langchain"]),
("langchain-aws.test.Dockerfile", ["langchain-aws"]),
("mlflow.test.Dockerfile", ["mlflow"]),
("jupyter-activity-monitor-extension.test.Dockerfile", ["jupyter-activity-monitor-extension"]),
(
"jupyter-activity-monitor-extension.test.Dockerfile",
["jupyter-activity-monitor-extension"],
),
("docker-cli.test.Dockerfile", ["docker-cli"]),
("s3fs.test.Dockerfile", ["s3fs"]),
("seaborn.test.Dockerfile", ["seaborn"]),
("sagemaker-recovery-mode.test.Dockerfile", ["sagemaker-jupyterlab-extension"]),
],
)
def test_dockerfiles_for_cpu(
dockerfile_path: str, required_packages: List[str], local_image_version: str, use_gpu: bool
dockerfile_path: str,
required_packages: List[str],
local_image_version: str,
use_gpu: bool,
):
_validate_docker_images(dockerfile_path, required_packages, local_image_version, use_gpu, "cpu")

Expand All @@ -66,7 +86,10 @@ def test_dockerfiles_for_cpu(
("autogluon.test.Dockerfile", ["autogluon"]),
("matplotlib.test.Dockerfile", ["matplotlib"]),
("matplotlib.test.Dockerfile", ["matplotlib-base"]),
("sagemaker-headless-execution-driver.test.Dockerfile", ["sagemaker-headless-execution-driver"]),
(
"sagemaker-headless-execution-driver.test.Dockerfile",
["sagemaker-headless-execution-driver"],
),
("scipy.test.Dockerfile", ["scipy"]),
("numpy.test.Dockerfile", ["numpy"]),
("boto3.test.Dockerfile", ["boto3"]),
Expand All @@ -83,24 +106,40 @@ def test_dockerfiles_for_cpu(
("notebook.test.Dockerfile", ["notebook"]),
("glue-sessions.test.Dockerfile", ["aws-glue-sessions"]),
("altair.test.Dockerfile", ["altair"]),
("sagemaker-studio-analytics-extension.test.Dockerfile", ["sagemaker-studio-analytics-extension"]),
("amazon-codewhisperer-jupyterlab-ext.test.Dockerfile", ["amazon-codewhisperer-jupyterlab-ext"]),
(
"sagemaker-studio-analytics-extension.test.Dockerfile",
["sagemaker-studio-analytics-extension"],
),
(
"amazon-codewhisperer-jupyterlab-ext.test.Dockerfile",
["amazon-codewhisperer-jupyterlab-ext"],
),
("jupyterlab-git.test.Dockerfile", ["jupyterlab-git"]),
("amazon-sagemaker-sql-magic.test.Dockerfile", ["amazon-sagemaker-sql-magic"]),
("amazon_sagemaker_sql_editor.test.Dockerfile", ["amazon_sagemaker_sql_editor"]),
(
"amazon_sagemaker_sql_editor.test.Dockerfile",
["amazon_sagemaker_sql_editor"],
),
("serve.test.Dockerfile", ["langchain"]),
("langchain-aws.test.Dockerfile", ["langchain-aws"]),
("mlflow.test.Dockerfile", ["mlflow"]),
("sagemaker-mlflow.test.Dockerfile", ["sagemaker-mlflow"]),
("jupyter-activity-monitor-extension.test.Dockerfile", ["jupyter-activity-monitor-extension"]),
(
"jupyter-activity-monitor-extension.test.Dockerfile",
["jupyter-activity-monitor-extension"],
),
("gpu-dependencies.test.Dockerfile", ["pytorch", "tensorflow"]),
("docker-cli.test.Dockerfile", ["docker-cli"]),
("s3fs.test.Dockerfile", ["s3fs"]),
("seaborn.test.Dockerfile", ["seaborn"]),
("sagemaker-recovery-mode.test.Dockerfile", ["sagemaker-jupyterlab-extension"]),
],
)
def test_dockerfiles_for_gpu(
dockerfile_path: str, required_packages: List[str], local_image_version: str, use_gpu: bool
dockerfile_path: str,
required_packages: List[str],
local_image_version: str,
use_gpu: bool,
):
_validate_docker_images(dockerfile_path, required_packages, local_image_version, use_gpu, "gpu")

Expand Down Expand Up @@ -142,7 +181,11 @@ def _check_required_package_constraints(target_version: Version, required_packag


def _validate_docker_images(
dockerfile_path: str, required_packages: List[str], local_image_version: str, use_gpu: bool, image_type: str
dockerfile_path: str,
required_packages: List[str],
local_image_version: str,
use_gpu: bool,
image_type: str,
):
target_version = get_semver(local_image_version)
test_artifacts_path = f"test/test_artifacts/v{str(target_version.major)}"
Expand Down Expand Up @@ -185,15 +228,74 @@ def _validate_docker_images(
# didn't execute successfully, the Docker client below will throw an error and fail the test.
# A consequence of this design decision is that any test assertions should go inside the container's entry-point.

container = _docker_client.containers.run(image=image.id, detach=True, stderr=True, device_requests=device_requests)
# Wait till container completes execution
result = container.wait()
exit_code = result["StatusCode"]
if exit_code != 0:
# Print STD out only during test failure
print(container.logs().decode("utf-8"))
# Remove the container.
container.remove(force=True)
_docker_client.images.remove(image=image.id, force=True)
# Fail the test if docker exit code is not zero
assert exit_code == 0
# Special handling for JupyterLab entrypoint testing
if dockerfile_path in ["recovery-mode.test.Dockerfile"]:
_test_jupyterlab_entrypoint(image)
else:
container = _docker_client.containers.run(
image=image.id, detach=True, stderr=True, device_requests=device_requests
)
# Wait till container completes execution
result = container.wait()
exit_code = result["StatusCode"]
if exit_code != 0:
# Print STD out only during test failure
print(container.logs().decode("utf-8"))
# Remove the container.
container.remove(force=True)
_docker_client.images.remove(image=image.id, force=True)
# Fail the test if docker exit code is not zero
assert exit_code == 0


def _test_jupyterlab_entrypoint(image):
"""
Test if the Docker image's entrypoint successfully starts the JupyterLab process.
This test assumes that the container will remain in a long-running state if JupyterLab starts successfully.
"""
print("Starting test to verify JupyterLab can be started...")
# Start the container in detached mode
container = _docker_client.containers.run(
image=image.id,
detach=True,
stderr=True,
)
try:
# Wait for the container logs to indicate JupyterLab has started
_wait_for_logs(container, "jupyterlabserver entered RUNNING state", timeout=5)
print("Container logs indicate JupyterLab started successfully.")

except Exception as e:
# Print logs and re-raise exception if the test fails
print(f"Test failed: {e}")
logs = container.logs().decode("utf-8")
print("Container logs:")
print(logs)
raise
finally:
# Stop and clean up the container
container.stop()
container.remove()
print("Stopped and removed the container.")


def _wait_for_logs(container, search_string, timeout=5, poll_interval=1):
"""
Wait for a specific string to appear in the container logs within a given timeout.
Args:
container: The container to monitor.
search_string: The string to search for in the logs.
timeout: Maximum time to wait for the string to appear (in seconds).
poll_interval: Time to wait between log checks (in seconds).
Raises:
TimeoutError: If the string does not appear in the logs within the timeout.
"""
start_time = time.time()
while time.time() - start_time < timeout:
logs = container.logs().decode("utf-8")
if search_string in logs:
return True
time.sleep(poll_interval)
raise TimeoutError(f"Container did not log '{search_string}' within {timeout} seconds.")

0 comments on commit 61626e7

Please sign in to comment.