From a570f43cfbac4d67bd0192348570b4435b46c660 Mon Sep 17 00:00:00 2001 From: Guo Quan Date: Tue, 4 Feb 2020 14:34:23 -0500 Subject: [PATCH 1/7] Add `ROOT_CONTAINER` to base-notebook Add another layer of abstraction to the base container, namely the root container `ROOT_CONTAINER`, so that the user can have the flexibility to build the whole series of images with `make` based on a different base image. --- base-notebook/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/base-notebook/Dockerfile b/base-notebook/Dockerfile index 020cd41ac1..9e1edcfa2e 100644 --- a/base-notebook/Dockerfile +++ b/base-notebook/Dockerfile @@ -3,7 +3,8 @@ # Ubuntu 18.04 (bionic) # https://hub.docker.com/_/ubuntu/?tab=tags&name=bionic -ARG BASE_CONTAINER=ubuntu:bionic-20200112@sha256:bc025862c3e8ec4a8754ea4756e33da6c41cba38330d7e324abd25c8e0b93300 +ARG ROOT_CONTAINER=ubuntu:bionic-20200112@sha256:bc025862c3e8ec4a8754ea4756e33da6c41cba38330d7e324abd25c8e0b93300 +ARG BASE_CONTAINER=$ROOT_CONTAINER FROM $BASE_CONTAINER LABEL maintainer="Jupyter Project " From 8b3ce5cfa6ce70a86ebb09bc0e2104048f920569 Mon Sep 17 00:00:00 2001 From: romainx Date: Thu, 13 Feb 2020 11:21:54 +0100 Subject: [PATCH 2/7] Change spark mirror --- pyspark-notebook/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyspark-notebook/Dockerfile b/pyspark-notebook/Dockerfile index f2dea592e3..9b243c0d1b 100644 --- a/pyspark-notebook/Dockerfile +++ b/pyspark-notebook/Dockerfile @@ -16,7 +16,7 @@ RUN apt-get -y update && \ rm -rf /var/lib/apt/lists/* RUN cd /tmp && \ - wget -q http://mirrors.ukfast.co.uk/sites/ftp.apache.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \ + wget -q https://www-us.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \ echo "2426a20c548bdfc07df288cd1d18d1da6b3189d0b78dee76fa034c52a4e02895f0ad460720c526f163ba63a17efae4764c46a1cd8f9b04c60f9937a554db85d2 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \ tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /usr/local --owner root --group root --no-same-owner && \ rm spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz From 7f7be5707c4058611fae8d1961cd98bf449ff29a Mon Sep 17 00:00:00 2001 From: romainx Date: Thu, 13 Feb 2020 12:00:41 +0100 Subject: [PATCH 3/7] spark mirror improvement --- pyspark-notebook/Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyspark-notebook/Dockerfile b/pyspark-notebook/Dockerfile index 9b243c0d1b..c3fc2d2ebb 100644 --- a/pyspark-notebook/Dockerfile +++ b/pyspark-notebook/Dockerfile @@ -15,8 +15,10 @@ RUN apt-get -y update && \ apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \ rm -rf /var/lib/apt/lists/* +# Using the preferred mirror to download the file RUN cd /tmp && \ - wget -q https://www-us.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \ + wget -q $(wget -qO- https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz\?as_json=1 | \ + python -c "import sys, json; content=json.load(sys.stdin); print(content['preferred']+content['path_info'])") && \ echo "2426a20c548bdfc07df288cd1d18d1da6b3189d0b78dee76fa034c52a4e02895f0ad460720c526f163ba63a17efae4764c46a1cd8f9b04c60f9937a554db85d2 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \ tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /usr/local --owner root --group root --no-same-owner && \ rm spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz From 4333c7cc142361f8504be15a0b2ed894fa4c5448 Mon Sep 17 00:00:00 2001 From: romainx Date: Thu, 13 Feb 2020 13:02:37 +0100 Subject: [PATCH 4/7] fix as_json param --- pyspark-notebook/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyspark-notebook/Dockerfile b/pyspark-notebook/Dockerfile index c3fc2d2ebb..ac9a41c21e 100644 --- a/pyspark-notebook/Dockerfile +++ b/pyspark-notebook/Dockerfile @@ -17,7 +17,7 @@ RUN apt-get -y update && \ # Using the preferred mirror to download the file RUN cd /tmp && \ - wget -q $(wget -qO- https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz\?as_json=1 | \ + wget -q $(wget -qO- https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz\?as_json | \ python -c "import sys, json; content=json.load(sys.stdin); print(content['preferred']+content['path_info'])") && \ echo "2426a20c548bdfc07df288cd1d18d1da6b3189d0b78dee76fa034c52a4e02895f0ad460720c526f163ba63a17efae4764c46a1cd8f9b04c60f9937a554db85d2 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \ tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /usr/local --owner root --group root --no-same-owner && \ From dfd6d2ed03c0a1c2422b5eb57d660c0313183da1 Mon Sep 17 00:00:00 2001 From: romainx Date: Thu, 13 Feb 2020 16:57:00 +0100 Subject: [PATCH 5/7] Fix doc make image -> make build --- docs/contributing/features.md | 2 +- docs/contributing/packages.md | 2 +- docs/contributing/tests.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/contributing/features.md b/docs/contributing/features.md index 1dca0d2cf9..79511e5bba 100644 --- a/docs/contributing/features.md +++ b/docs/contributing/features.md @@ -26,7 +26,7 @@ If there's agreement that the feature belongs in one or more of the core stacks: 1. Implement the feature in a local clone of the `jupyter/docker-stacks` project. 2. Please build the image locally before submitting a pull request. Building the image locally shortens the debugging cycle by taking some load off [Travis CI](http://travis-ci.org/), which graciously provides free build services for open source projects like this one. If you use `make`, call: ``` -make image/somestack-notebook +make build/somestack-notebook ``` 3. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) (PR) with your changes. 4. Watch for Travis to report a build success or failure for your PR on GitHub. diff --git a/docs/contributing/packages.md b/docs/contributing/packages.md index 11828a8b5a..ef9e8b55cb 100644 --- a/docs/contributing/packages.md +++ b/docs/contributing/packages.md @@ -8,7 +8,7 @@ Please follow the process below to update a package version: 2. Adjust the version number for the package. We prefer to pin the major and minor version number of packages so as to minimize rebuild side-effects when users submit pull requests (PRs). For example, you'll find the Jupyter Notebook package, `notebook`, installed using conda with `notebook=5.4.*`. 3. Please build the image locally before submitting a pull request. Building the image locally shortens the debugging cycle by taking some load off [Travis CI](http://travis-ci.org/), which graciously provides free build services for open source projects like this one. If you use `make`, call: ``` -make image/somestack-notebook +make build/somestack-notebook ``` 4. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) (PR) with your changes. 5. Watch for Travis to report a build success or failure for your PR on GitHub. diff --git a/docs/contributing/tests.md b/docs/contributing/tests.md index c2ff4f358e..80d2786d8c 100644 --- a/docs/contributing/tests.md +++ b/docs/contributing/tests.md @@ -14,7 +14,7 @@ Please follow the process below to add new tests: 2. If your test should run against a single image, add your test code to one of the modules in `some-notebook/test/` or create a new module. 3. Build one or more images you intend to test and run the tests locally. If you use `make`, call: ``` -make image/somestack-notebook +make build/somestack-notebook make test/somestack-notebook ``` 4. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) (PR) with your changes. From 3aa61f94c204c07ad00b8e352a86d4258d5d55a5 Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sat, 15 Feb 2020 18:53:43 -0500 Subject: [PATCH 6/7] Split SPARK_HOME definition from other env vars --- pyspark-notebook/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyspark-notebook/Dockerfile b/pyspark-notebook/Dockerfile index f2dea592e3..c8dd3e0978 100644 --- a/pyspark-notebook/Dockerfile +++ b/pyspark-notebook/Dockerfile @@ -36,11 +36,11 @@ RUN apt-get -y update && \ rm -rf /var/lib/apt/lists/* # Spark and Mesos config -ENV SPARK_HOME=/usr/local/spark \ - PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip \ +ENV SPARK_HOME=/usr/local/spark +ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip \ MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so \ SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \ - PATH=$PATH:/usr/local/spark/bin + PATH=$PATH:$SPARH_HOME/bin USER $NB_UID From 4a8b58a41b7b6f39fba9db855f337028c0f24aea Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sat, 15 Feb 2020 18:53:58 -0500 Subject: [PATCH 7/7] Test payspark import --- pyspark-notebook/Dockerfile | 2 +- pyspark-notebook/test/test_spark.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/pyspark-notebook/Dockerfile b/pyspark-notebook/Dockerfile index c8dd3e0978..19b11d141d 100644 --- a/pyspark-notebook/Dockerfile +++ b/pyspark-notebook/Dockerfile @@ -40,7 +40,7 @@ ENV SPARK_HOME=/usr/local/spark ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip \ MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so \ SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \ - PATH=$PATH:$SPARH_HOME/bin + PATH=$PATH:$SPARK_HOME/bin USER $NB_UID diff --git a/pyspark-notebook/test/test_spark.py b/pyspark-notebook/test/test_spark.py index 2b5499aea1..a09d025680 100644 --- a/pyspark-notebook/test/test_spark.py +++ b/pyspark-notebook/test/test_spark.py @@ -16,4 +16,15 @@ def test_spark_shell(container): c.wait(timeout=30) logs = c.logs(stdout=True).decode('utf-8') LOGGER.debug(logs) - assert 'res0: Int = 2' in logs \ No newline at end of file + assert 'res0: Int = 2' in logs + +def test_pyspark(container): + """PySpark should be in the Python path""" + c = container.run( + tty=True, + command=['start.sh', 'python', '-c', '"import pyspark"'] + ) + rv = c.wait(timeout=30) + assert rv == 0 or rv["StatusCode"] == 0 + logs = c.logs(stdout=True).decode('utf-8') + LOGGER.debug(logs)