diff --git a/Dockerfile b/Dockerfile index 9087494..0bd8442 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,11 @@ +ARG MICROMAMBA_VERSION="1.5.6" +ARG ENVIRONMENT_FILE="env.yaml" +ARG NOTEBOOK_MODE # Stage 1 -FROM mambaorg/micromamba:1.5.6 as micromamba-patched +FROM mambaorg/micromamba:${MICROMAMBA_VERSION} as micromamba-patched +ARG MICROMAMBA_VERSION +ARG ENVIRONMENT_FILE +ARG NOTEBOOK_MODE # Install security updates if base image is not yet patched # Inspired by https://pythonspeed.com/articles/security-updates-in-docker/ USER root @@ -8,13 +14,24 @@ RUN apt-get update && apt-get -y upgrade # cat /etc/apt/sources.list # WORKDIR /etc/apt/ USER $MAMBA_USER + # Stage 2 FROM micromamba-patched +ARG ENVIRONMENT_FILE +ARG NOTEBOOK_MODE USER $MAMBA_USER -COPY --chown=$MAMBA_USER:$MAMBA_USER env.yaml /tmp/env.yaml +# ENV ENVIRONMENT_FILE=${ENVIRONMENT_FILE} +ENV NOTEBOOK_MODE=${NOTEBOOK_MODE} +COPY --chown=$MAMBA_USER:$MAMBA_USER ${ENVIRONMENT_FILE} /tmp/env.yaml # Install packages +# The name of the environment will always be "base", irrespective of the YAML file +# This is due to the way micromamba-docker works RUN micromamba install -y -n base -f /tmp/env.yaml && \ micromamba clean --all --yes +# Install Kernels for Jupyter Notebook etc. +# TODO: Add +RUN echo Notebook mode is "$NOTEBOOK_MODE" +RUN if [[ -n "$NOTEBOOK_MODE" ]] ; then echo DEBUG Notebook mode ; fi WORKDIR /usr/app/src COPY --chown=$MAMBA_USER:$MAMBA_USER src/ ./ ARG MAMBA_DOCKERFILE_ACTIVATE=1 diff --git a/README.md b/README.md index a5e7cb3..85c1c24 100644 --- a/README.md +++ b/README.md @@ -74,9 +74,9 @@ The script should run and report its progress, like so Now, you can start working on your own code. -1. In `build.sh` and `run_script.sh`, change the string `test_app` to a name for your application (e.g. `my_crawler`), like so +1. In `build.sh` **and** `run_script.sh`, change the string `test_app` to a name for your application (e.g. `my_crawler`), like so ```bash -IMAGE_NAME="my_crawler" +APPLICATION_ID="my_crawler" ``` 2. Edit the list of Python packages in `env.yaml` 3. You may want to change the name of the starter script `run_script.sh` to the name of your project (like `my_crawler.sh`). @@ -92,13 +92,15 @@ Your Python script will see the following directory structure: ``` - `/usr/app/src`: This is the source code and startup directory. -In the regular mode, this is the `src` folder inside the Docker container, created from the image. -It will not be updated until you re-build the image. -In **development mode** (see below for details), this is the `src` **in the directory that contains the `run_script.sh` script.** Symbolic links will be resolved. +In the regular mode, this is the `src` folder inside the Docker container, created from the image. + - **It will not be updated until you re-build the image.** + - In **development mode** (see below for details), this is the `src` **in the directory that contains the `run_script.sh` script.** Symbolic links will be resolved. - `/usr/app/data`: This is the host's current working directory, i.e. from where you start the `run_script.sh` script. -- `/usr/app/data/output`: This is a writeable directory for results, mapped to the `output` folder relative to the host's current working directory +- `/usr/app/data/output`: This is a writeable directory for results, mapped to the `output` folder within the current working directory on the host. -**Important:** The mapping of **directories from your local machine to these paths** inside the container **depends on from where you start the `run_script.sh` script.** The rationale is that the code can only see the data from the current (working) directory and only write to a dedicated `output` subdirectory therein. A malicious script can hence not modify or delete files in your working directory. But if you start the script from your user root directory `~/`, then the script can read all files from all subdirectories. +**Important:** +1. The mapping of **directories from your local machine to these paths** inside the container **depends on from where you start the `run_script.sh` script.** The rationale is that the code can only see the data from the current (working) directory and only write to a dedicated `output` subdirectory therein. +2. A malicious script or library can hence not modify or delete files in your working directory. **But if you start the script from your user root directory** `~/`, then **the script can read all files from all subdirectories.** In the development mode, the inner workings are a bit more complicated. Please see the comments in the `run_script.sh` file for details. @@ -107,11 +109,27 @@ In the development mode, the inner workings are a bit more complicated. Please s Before you can run your own code, you need to build a Docker image with `build.sh`: ```bash -Usage: ./build.sh [OPTIONS] +Usage: ./build.sh [OPTIONS] [.yaml] Option(s): - -d: development mode (create test_app_dev) - -f: force fresh build, ignoring caches (will update Python packages) + -d: development mode (create /test_app:dev) + -f: force fresh build, ignoring cached build stages (will e.g. update Python packages) + -n: Jupyter Notebook mode (create /notebook or /notebook:) +``` + +**Note:** The notebook mode is not yet fully functional. + +### Using another YAML environment file + +You can pass the name of another YAML environment file as CLI argument (the file extension `.yaml` is added automatically.). The name of the YAML file will be added to the Docker image tag, like so: + +```bash +# Use foo.yaml and create the image +# /test_app:foo +./build.sh foo +# Use foo.yaml in development mode and create the image +# /test_app:foo-dev +./build.sh -d foo ``` ### Development Image @@ -121,7 +139,7 @@ Go to your project directory and execute: ```bash ./build.sh -d ``` -This builds a development image, named `test_app_dev` (or whatever you chose for `test_app`; the suffix `_dev` is added automatically). +This builds a development image, named `/test_app:dev` (or whatever you chose for `test_app`; the digest `:dev` is added automatically). ### Image for Production @@ -131,7 +149,7 @@ When done, you can build a production image with ./build.sh ``` -This builds an image for production, named `test_app` (or whatever you chose). +This builds an image for production, named `/test_app` (or whatever you chose). The motivation for two images is that you will keep an image of your last working version available while you are developing (e.g. on feature branches). @@ -164,9 +182,10 @@ Usage: ./run_script.sh [OPTIONS] [APP_ARGS] Options: -d: (D)evelopment mode (mount local volume, as read-only) + -D: Expert (D)evelopment mode with WRITE ACCESS to src/ -i: (i)nteractive mode (keep terminal open and start with bash) -n: Allow outbound (N)etwork access to host network - --help: Show help + --help: Show help ``` All other arguments and options will be passed to your `main.py` application. @@ -183,7 +202,7 @@ In other words, **if you change your code, the new code will be executed** via ` ./run_script.sh -d ``` -Try to avoid using this mode from within the `src` directory. +**Warning:** Try to avoid using this mode from within the `src` directory, as malicious code could change your executable components. ### Production Mode @@ -275,9 +294,10 @@ It is recommended that you create a simplified version of the `run_script.sh` sc ### Creating an Alias -If you want to be able to run the script just by a single command, like `my_script FooBar`, then add the following lines to your `.bash_profile` file, like so (`~/foo/bar/py4docker/` is the *absolute path* to the project in this example): +If you want to be able to run the script just by a single command, like `my_script FooBar`, then add the following lines to your `.bash_profile` file, like so: ```bash +# ~/foo/bar/py4docker/ is the absolute path to the project in this example alias my_script="bash ~/foo/bar/py4docker/run_script.sh" ``` @@ -361,12 +381,12 @@ More advanced settings are possible, e.g. adding a proxy or firewall inside the ## Limitations and Ideas for Improvement -- The code is currently maintained for Docker Desktop on Apple Silicon only. It may work on other platforms, but I have no time for testing at the moment. +- The code is currently maintained for Docker Desktop on Apple Silicon only. It may work on other platforms, but I have no time for testing at the moment. It seems to work on Debian. - Expand support for blocking and logging Internet access e.g. by domain or IP ranges is a priority at my side, but non-trivial. ## LICENSE -- tbd. Not yet decided; please ask! +- TODO: Not yet decided; please ask if urgent! - The [Docker default seccomp profile file](https://github.com/moby/moby/blob/master/profiles/seccomp/default.json) is being used under an [Apache 2.0 License](https://github.com/moby/moby/blob/master/LICENSE). ## Related Projects diff --git a/build.sh b/build.sh index bca8d9c..ae9748e 100755 --- a/build.sh +++ b/build.sh @@ -12,33 +12,82 @@ # to # micromamba_local_user -IMAGE_NAME="test_app" +# Images should use user:name:digest in order to avoid collisions +USERNAME=$USER +APPLICATION_ID="test_app" +NOTEBOOK_ID="notebook" +BUILD_NOTEBOOK="" +DIGEST="" +ENVIRONMENT_FILE="env.yaml" usage () { printf 'Builds the Docker image from the Dockerfile\n' - printf 'Usage: %s [OPTIONS] \n\n' "$0" + printf 'Usage: %s [OPTIONS] [.yaml]\n\n' "$0" printf 'Option(s):\n' - printf " -d: development mode (create ${IMAGE_NAME}_dev)\n" - printf ' -f: force fresh build, ignoring caches (will update Python packages)\n' - + printf " -d: development mode (create $USER/$APPLICATION_ID:dev)\n" + printf ' -f: force fresh build, ignoring cached build stages (will e.g. update Python packages)\n' + printf " -n: Jupyter Notebook mode (create $USER/$NOTEBOOK_ID or $USER/$NOTEBOOK_ID:)\n" } -CACHED="" +if [[ $1 = "--help" ]]; then + usage + exit 0 +fi -while getopts ":df" opt; do +while getopts ":dfn" opt; do case ${opt} in d) - echo "Building development image as ${IMAGE_NAME}_dev" - IMAGE_NAME="${IMAGE_NAME}_dev" + if [ "$APPLICATION_ID" = "$NOTEBOOK_ID" ]; then + echo "ERROR: Incompatible options -d and -n. Aborting." + exit 1 + else + echo "INFO: Building DEVELOPMENT image" + DIGEST="dev" + fi ;; f) - echo "Force fresh build, ignoring caches (will update Python packages)" + echo "INFO: Force fresh build, ignoring cached build stages (will update Python packages and Debian packages)" PARAMETERS="--no-cache" ;; + n) + if [ "$DIGEST" = "dev" ]; then + echo "ERROR: Incompatible options -d and -n. Aborting." + exit 1 + else + echo "INFO: Building Jupyter NOTEBOOK image" + ENVIRONMENT_FILE="notebook.yaml" + APPLICATION_ID=$NOTEBOOK_ID + BUILD_NOTEBOOK="--build-arg NOTEBOOK_MODE=true" + fi + ;; ?) usage && exit 1 esac done -docker build $PARAMETERS --progress=plain --tag $IMAGE_NAME . +# Remove processed arguments +shift $((OPTIND-1)) + +if [ $# -eq 0 ]; then + echo "INFO: Environment file = $ENVIRONMENT_FILE" +else + ENVIRONMENT_FILE="$1.yaml" + # user:test_app:env-dev + # user:test_app:env + DIGEST="$1${DIGEST:+-$DIGEST}" + echo "INFO: Environment file = $ENVIRONMENT_FILE" +fi +# Test if environment file exists +if [ ! -f "$ENVIRONMENT_FILE" ]; then + echo "ERROR: Environment file $ENVIRONMENT_FILE not found. Aborting." + exit 1 +fi + +# Hint: ${DIGEST:+:$DIGEST} means add ":value" if variable DIGEST is set, nothing otherwise +IMAGE_NAME="${USER}/${APPLICATION_ID}${DIGEST:+:$DIGEST}" +echo INFO: Image tag = $IMAGE_NAME +docker build $PARAMETERS \ + --build-arg="ENVIRONMENT_FILE=$ENVIRONMENT_FILE" \ +$BUILD_NOTEBOOK \ + --progress=plain --tag $IMAGE_NAME . diff --git a/notebook.yaml b/notebook.yaml new file mode 100644 index 0000000..22eeff0 --- /dev/null +++ b/notebook.yaml @@ -0,0 +1,16 @@ +# Using an environment name other than "base" is not recommended! +# Read https://github.com/mamba-org/micromamba-docker#multiple-environments +# if you must use a different environment name. +name: base +channels: + - conda-forge +dependencies: + - pip + - python>=3.9 + - typer + - requests + - httpx + - black[jupyter] +# PyPi modules +# - pip: +# - black[jupyter] diff --git a/run_script.sh b/run_script.sh index e867311..191b52d 100755 --- a/run_script.sh +++ b/run_script.sh @@ -2,17 +2,21 @@ # Shell script for starting Docker container with main.py # TODO: - Check other useful Docker CLI options +APPLICATION_ID="test_app" +# Change to name of alternate Python environment if needed +# Example: +# DIGEST="foo" for user/test_app:foo from foo.yaml +DIGEST="" SOURCE_MOUNT="" NETWORK="--net=none" PARAMETERS="" -IMAGE_NAME="test_app" -COMMAND="python ./main.py" # Change to COMMAND="python -u ./main.py" if you want print() statements # to be visible on stdout (default: logging only) +COMMAND="python ./main.py" usage () { - printf "Starts the ${IMAGE_NAME} application inside a Docker container\n" + printf "Starts the ${USER}/${IMAGE_NAME} application inside a Docker container\n" printf 'Usage: %s [OPTIONS] [APP_ARGS]\n\n' "$0" printf 'Options:\n' printf ' -d: (D)evelopment mode (mount local volume, as read-only)\n' @@ -34,6 +38,7 @@ echo "INFO: Working directory is $REAL_PWD" SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" SOURCE_DIR=${SCRIPT_DIR}/src echo "INFO: Source code path is $SOURCE_DIR" +# Mapping user id and user group for Linux bind mounts UID_HOST=$(id -u) GID_HOST=$(id -g) echo "INFO: Local User has UID = $UID_HOST, GID = $GID_HOST" @@ -47,45 +52,53 @@ fi while getopts ":dDin" opt; do case ${opt} in d) - if [ -n "$DEV" ]; then + if [ -n "$DIGEST_SUFFIX" ]; then echo "ERROR: Incompatible options -d and -D. Aborting." exit 1 else - DEV=TRUE - echo "Development mode enabled (running code from local file)" - echo "Using image ${IMAGE_NAME}_dev" + echo "INFO: Development mode enabled (running code from local file)" + DIGEST_SUFFIX="dev" +# echo "Using image ${IMAGE_NAME}_dev" SOURCE_MOUNT="--mount type=bind,source=$SCRIPT_DIR/src,target=/usr/app/src,readonly" - IMAGE_NAME="${IMAGE_NAME}_dev" +# IMAGE_NAME="${IMAGE_NAME}_dev" fi ;; D) - if [ -n "$DEV" ]; then + if [ -n "$DIGEST_SUFFIX" ]; then echo "ERROR: Incompatible options -d and -D. Aborting." exit 1 else - DEV=TRUE - echo "EXPERT development mode enabled (running code from local file)" - echo "Write access granted to $SCRIPT_DIR/src" + echo "INFO: EXPERT development mode enabled (running code from local file)" + echo "WARNING: Write access granted to $SCRIPT_DIR/src" read -n1 -p "Do you REALLY want to continue (Y/N)?" reply echo "" [ "$reply" != "Y" ] && [ "$reply" != "y" ] && echo "Aborting." && exit 1 - echo "Using image ${IMAGE_NAME}_dev" + DIGEST_SUFFIX="dev" + # echo "Using image ${IMAGE_NAME}_dev" SOURCE_MOUNT="--mount type=bind,source=$SCRIPT_DIR/src,target=/usr/app/src" - IMAGE_NAME="${IMAGE_NAME}_dev" + # IMAGE_NAME="${IMAGE_NAME}_dev" fi ;; i) - echo "Interactive mode enabled, keeping terminal open (use 'exit' to quit)" + echo "INFO: Interactive mode enabled, keeping terminal open (use 'exit' to quit)" PARAMETERS="-it" COMMAND="/bin/sh" ;; n) - echo "Outbound network ENABLED (Warning: The script can access the entire host network)" + echo "INFO: Outbound network ENABLED (Warning: The script can access the entire host network)" NETWORK="--net=host" ;; esac done -# Remove processed +if [ -n "$DIGEST" ]; then + # user:test_app:foo -> user:test_app:foo-dev + DIGEST="$DIGEST${DIGEST_SUFFIX:+-DIGEST_SUFFIX}" +else + DIGEST=$DIGEST_SUFFIX +fi +IMAGE_NAME="${USER}/${APPLICATION_ID}${DIGEST:+:$DIGEST}" + +# Remove processed arguments shift $((OPTIND-1)) # In developer mode, we need to make sure that, @@ -117,7 +130,7 @@ case $SOURCE_DIR/ in FIX_OVERLAP_MOUNT="--mount type=volume,target=/usr/app/data/${SOURCE_DIR#$REAL_PWD/}" # We will FIRST mount the upper host directory, in this case the PWD MOUNT_AFTER_PWD=$SOURCE_MOUNT - echo DEBUG: Blocking overlap via $FIX_OVERLAP_MOUNT + echo INFO: Blocking overlapping paths via $FIX_OVERLAP_MOUNT ;; *) echo "INFO: src/ IS NOT in $REAL_PWD/*" ;; esac @@ -128,11 +141,12 @@ case $REAL_PWD/ in FIX_OVERLAP_MOUNT="--mount type=volume,target=/usr/app/src/${REAL_PWD#$SOURCE_DIR/}" # We will FIRST mount the upper host directory, in this case the source directory MOUNT_BEFORE_PWD=$SOURCE_MOUNT - echo DEBUG: Blocking overlap via $FIX_OVERLAP_MOUNT + echo INFO: Blocking overlapping paths via $FIX_OVERLAP_MOUNT ;; *) echo "INFO: $REAL_PWD IS NOT in src/*" ;; esac +echo INFO: Docker image is = $IMAGE_NAME # Create output directory if not exists mkdir -p output @@ -150,4 +164,4 @@ $NETWORK \ --read-only --tmpfs /tmp \ --cap-drop all \ --rm \ - $IMAGE_NAME $COMMAND "$@" + $IMAGE_NAME $COMMAND "$@" \ No newline at end of file