From cb2ef94840731f34a748860d984d4dca342d88be Mon Sep 17 00:00:00 2001 From: Alex Rudy Date: Mon, 6 May 2019 13:32:38 -0400 Subject: [PATCH 1/7] Disable IPython history when preprocessing IPython saves history in an SQLite DB. This can break in unfortunate ways when running multiple IPython kernels simultaneously. NBConvert will now default to disabling history saving in IPython kernels. --- nbconvert/preprocessors/execute.py | 16 +++++++ .../tests/files/Check History in Memory.ipynb | 46 +++++++++++++++++++ nbconvert/preprocessors/tests/test_execute.py | 1 + 3 files changed, 63 insertions(+) create mode 100644 nbconvert/preprocessors/tests/files/Check History in Memory.ipynb diff --git a/nbconvert/preprocessors/execute.py b/nbconvert/preprocessors/execute.py index 2e432819b..1df2edc77 100644 --- a/nbconvert/preprocessors/execute.py +++ b/nbconvert/preprocessors/execute.py @@ -220,6 +220,20 @@ class ExecutePreprocessor(Preprocessor): ) ).tag(config=True) + ipython_hist_file = Unicode( + default_value=':memory:', + help="""Path to file to use for SQLite history database for an IPython kernel. + + The specific value `:memory:` (including the colon + at both end but not the back ticks), avoids creating a history file. Otherwise, IPython + will create a history file for each kernel. + + When running kernels simultaneously (e.g. via multiprocessing) saving history a single + SQLite file can result in database errors, so using `:memory:` is recommended in non-interactive + contexts. + + """).tag(config=True) + kernel_manager_class = Type( config=True, help='The kernel manager class to use.' @@ -268,6 +282,8 @@ def start_new_kernel(self, **kwargs): 'kernelspec', {}).get('name', 'python') km = self.kernel_manager_class(kernel_name=self.kernel_name, config=self.config) + if km.ipykernel and self.ipython_hist_file: + self.extra_arguments += ['--HistoryManager.hist_file={}'.format(self.ipython_hist_file)] km.start_kernel(extra_arguments=self.extra_arguments, **kwargs) kc = km.client() diff --git a/nbconvert/preprocessors/tests/files/Check History in Memory.ipynb b/nbconvert/preprocessors/tests/files/Check History in Memory.ipynb new file mode 100644 index 000000000..6b0c58c91 --- /dev/null +++ b/nbconvert/preprocessors/tests/files/Check History in Memory.ipynb @@ -0,0 +1,46 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython import get_ipython" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "ip = get_ipython()\n", + "assert ip.history_manager.hist_file == ':memory:'" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nbconvert/preprocessors/tests/test_execute.py b/nbconvert/preprocessors/tests/test_execute.py index 163bcdea6..a7c711695 100644 --- a/nbconvert/preprocessors/tests/test_execute.py +++ b/nbconvert/preprocessors/tests/test_execute.py @@ -238,6 +238,7 @@ def assert_notebooks_equal(expected, actual): ("Unicode.ipynb", dict(kernel_name="python")), ("UnicodePy3.ipynb", dict(kernel_name="python")), ("update-display-id.ipynb", dict(kernel_name="python")), + ("Check History in Memory.ipynb", dict(kernel_name="python")), ] ) def test_run_all_notebooks(input_name, opts): From e348ee153f82d060bf61b50b5fd0873ca829546f Mon Sep 17 00:00:00 2001 From: Alex Rudy Date: Tue, 7 May 2019 09:43:26 -0400 Subject: [PATCH 2/7] Test for parallel execution of notebooks --- .../tests/files/Parallel Execute.ipynb | 84 +++++++++++++++++++ nbconvert/preprocessors/tests/test_execute.py | 69 +++++++++++++-- 2 files changed, 148 insertions(+), 5 deletions(-) create mode 100644 nbconvert/preprocessors/tests/files/Parallel Execute.ipynb diff --git a/nbconvert/preprocessors/tests/files/Parallel Execute.ipynb b/nbconvert/preprocessors/tests/files/Parallel Execute.ipynb new file mode 100644 index 000000000..1e49a6d4f --- /dev/null +++ b/nbconvert/preprocessors/tests/files/Parallel Execute.ipynb @@ -0,0 +1,84 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import os.path\n", + "import tempfile\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "other_notebook = {'A':'B', 'B':'A'}[this_notebook]\n", + "directory = os.environ['NBEXECUTE_TEST_PARALLEL_TMPDIR']\n", + "with open(os.path.join(directory, 'test_file_{}.txt'.format(this_notebook)), 'w') as f:\n", + " f.write('Hello from {}'.format(this_notebook))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "start = time.time()\n", + "timeout = 5\n", + "end = start + timeout\n", + "target_file = os.path.join(directory, 'test_file_{}.txt'.format(other_notebook))\n", + "while time.time() < end:\n", + " time.sleep(0.1)\n", + " if os.path.exists(target_file):\n", + " with open(target_file, 'r') as f:\n", + " text = f.read()\n", + " if text == 'Hello from {}'.format(other_notebook):\n", + " break\n", + "else:\n", + " assert False, \"Timed out – didn't get a message from {}\".format(other_notebook)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nbconvert/preprocessors/tests/test_execute.py b/nbconvert/preprocessors/tests/test_execute.py index a7c711695..3d3716382 100644 --- a/nbconvert/preprocessors/tests/test_execute.py +++ b/nbconvert/preprocessors/tests/test_execute.py @@ -12,7 +12,10 @@ import glob import io import os +import logging import re +import threading +import multiprocessing as mp import nbformat import sys @@ -69,7 +72,7 @@ def build_preprocessor(opts): return preprocessor -def run_notebook(filename, opts, resources): +def run_notebook(filename, opts, resources, preprocess_notebook=None): """Loads and runs a notebook, returning both the version prior to running it and the version after running it. @@ -77,6 +80,9 @@ def run_notebook(filename, opts, resources): with io.open(filename) as f: input_nb = nbformat.read(f, 4) + if preprocess_notebook: + input_nb = preprocess_notebook(input_nb) + preprocessor = build_preprocessor(opts) cleaned_input_nb = copy.deepcopy(input_nb) for cell in cleaned_input_nb.cells: @@ -218,6 +224,12 @@ def assert_notebooks_equal(expected, actual): actual_execution_count = actual_cell.get('execution_count', None) assert expected_execution_count == actual_execution_count +def notebook_resources(): + res = ResourcesDict() + res['metadata'] = ResourcesDict() + res['metadata']['path'] = os.path.join(current_dir, 'files') + return res + @pytest.mark.parametrize( ["input_name", "opts"], @@ -244,13 +256,60 @@ def assert_notebooks_equal(expected, actual): def test_run_all_notebooks(input_name, opts): """Runs a series of test notebooks and compares them to their actual output""" input_file = os.path.join(current_dir, 'files', input_name) - res = ResourcesDict() - res['metadata'] = ResourcesDict() - res['metadata']['path'] = os.path.join(current_dir, 'files') - input_nb, output_nb = run_notebook(input_file, opts, res) + input_nb, output_nb = run_notebook(input_file, opts, notebook_resources()) assert_notebooks_equal(input_nb, output_nb) +def label_parallel_notebook(nb, label): + """Insert a cell in a notebook which sets the variable `this_notebook` to the string `label`. + + Used for parallel testing to label two notebooks which are run simultaneously. + """ + label_cell = nbformat.NotebookNode( + { + "cell_type": "code", + "execution_count": None, + "metadata": {}, + "outputs": [], + "source": "this_notebook = '{}'".format(label), + } + ) + + nb.cells.insert(1, label_cell) + return nb + + +def test_parallel_notebooks(capfd, tmpdir): + """Two notebooks should be able to be run simultaneously without problems. + + The two notebooks spawned here use the filesystem to check that the other notebook + wrote to the filesystem.""" + + opts = dict(kernel_name="python") + input_name = "Parallel Execute.ipynb" + input_file = os.path.join(current_dir, "files", input_name) + res = notebook_resources() + + with modified_env({"NBEXECUTE_TEST_PARALLEL_TMPDIR": str(tmpdir)}): + threads = [ + threading.Thread( + target=run_notebook, + args=( + input_file, + opts, + res, + functools.partial(label_parallel_notebook, label=label), + ), + ) + for label in ("A", "B") + ] + [t.start() for t in threads] + [t.join(timeout=2) for t in threads] + + captured = capfd.readouterr() + assert captured.err == "" + + class TestExecute(PreprocessorTestsBase): """Contains test functions for execute.py""" maxDiff = None From e296eb5f93cc1ecf0f1df949251aa0cdb140e4be Mon Sep 17 00:00:00 2001 From: Alex Rudy Date: Wed, 8 May 2019 10:12:14 -0400 Subject: [PATCH 3/7] Fixup comments on parallel notebook test --- .../tests/files/Parallel Execute.ipynb | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/nbconvert/preprocessors/tests/files/Parallel Execute.ipynb b/nbconvert/preprocessors/tests/files/Parallel Execute.ipynb index 1e49a6d4f..1877db6fd 100644 --- a/nbconvert/preprocessors/tests/files/Parallel Execute.ipynb +++ b/nbconvert/preprocessors/tests/files/Parallel Execute.ipynb @@ -1,5 +1,17 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Ensure notebooks can execute in parallel\n", + "\n", + "This notebook uses a file system based \"lock\" to assert that two instances of the notebook kernel will run in parallel. Each instance writes to a file in a temporary directory, and then tries to read the other file from\n", + "the temporary directory, so that running them in sequence will fail, but running them in parallel will succed.\n", + "\n", + "Two notebooks are launched, each with an injected cell which sets the `this_notebook` variable. One notebook is set to `this_notebook = 'A'` and the other `this_notebook = 'B'`." + ] + }, { "cell_type": "code", "execution_count": null, @@ -18,6 +30,7 @@ "metadata": {}, "outputs": [], "source": [ + "# the variable this_notebook is injectected in a cell above by the test framework.\n", "other_notebook = {'A':'B', 'B':'A'}[this_notebook]\n", "directory = os.environ['NBEXECUTE_TEST_PARALLEL_TMPDIR']\n", "with open(os.path.join(directory, 'test_file_{}.txt'.format(this_notebook)), 'w') as f:\n", @@ -44,20 +57,6 @@ "else:\n", " assert False, \"Timed out – didn't get a message from {}\".format(other_notebook)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 53653cb438a9979e9ece32f70c8befc3de26dfd3 Mon Sep 17 00:00:00 2001 From: Alex Rudy Date: Wed, 8 May 2019 17:56:57 -0700 Subject: [PATCH 4/7] Remove kernel specification --- .../tests/files/Parallel Execute.ipynb | 20 +------------------ 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/nbconvert/preprocessors/tests/files/Parallel Execute.ipynb b/nbconvert/preprocessors/tests/files/Parallel Execute.ipynb index 1877db6fd..d40545c30 100644 --- a/nbconvert/preprocessors/tests/files/Parallel Execute.ipynb +++ b/nbconvert/preprocessors/tests/files/Parallel Execute.ipynb @@ -59,25 +59,7 @@ ] } ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, + "metadata": {}, "nbformat": 4, "nbformat_minor": 2 } From fb3780bbf96229e3ed7cc7f1d0771ad7b85fbe17 Mon Sep 17 00:00:00 2001 From: Alex Rudy Date: Wed, 8 May 2019 17:59:16 -0700 Subject: [PATCH 5/7] Remove kernel spec from additional notebook --- .../tests/files/Check History in Memory.ipynb | 20 +------------------ 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/nbconvert/preprocessors/tests/files/Check History in Memory.ipynb b/nbconvert/preprocessors/tests/files/Check History in Memory.ipynb index 6b0c58c91..7dc65a1a3 100644 --- a/nbconvert/preprocessors/tests/files/Check History in Memory.ipynb +++ b/nbconvert/preprocessors/tests/files/Check History in Memory.ipynb @@ -22,25 +22,7 @@ ] } ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, + "metadata": {}, "nbformat": 4, "nbformat_minor": 2 } From 7b4a67711fc1df56d6aeb31a3e75cd63a466968a Mon Sep 17 00:00:00 2001 From: Alex Rudy Date: Mon, 13 May 2019 10:53:22 -0700 Subject: [PATCH 6/7] Review fixes --- nbconvert/preprocessors/tests/test_execute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbconvert/preprocessors/tests/test_execute.py b/nbconvert/preprocessors/tests/test_execute.py index 3d3716382..c9aa20b4f 100644 --- a/nbconvert/preprocessors/tests/test_execute.py +++ b/nbconvert/preprocessors/tests/test_execute.py @@ -12,7 +12,6 @@ import glob import io import os -import logging import re import threading import multiprocessing as mp @@ -225,6 +224,7 @@ def assert_notebooks_equal(expected, actual): assert expected_execution_count == actual_execution_count def notebook_resources(): + """Prepare a notebook resources dictionary for executing test notebooks in the `files` folder.""" res = ResourcesDict() res['metadata'] = ResourcesDict() res['metadata']['path'] = os.path.join(current_dir, 'files') From 0c7d7ba3b6b05d14cade2b76525a525e559f7fcc Mon Sep 17 00:00:00 2001 From: Alex Rudy Date: Mon, 13 May 2019 16:16:13 -0700 Subject: [PATCH 7/7] Use nbformat to set up notebook cells --- nbconvert/preprocessors/tests/test_execute.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/nbconvert/preprocessors/tests/test_execute.py b/nbconvert/preprocessors/tests/test_execute.py index c9aa20b4f..0229aa52e 100644 --- a/nbconvert/preprocessors/tests/test_execute.py +++ b/nbconvert/preprocessors/tests/test_execute.py @@ -265,16 +265,7 @@ def label_parallel_notebook(nb, label): Used for parallel testing to label two notebooks which are run simultaneously. """ - label_cell = nbformat.NotebookNode( - { - "cell_type": "code", - "execution_count": None, - "metadata": {}, - "outputs": [], - "source": "this_notebook = '{}'".format(label), - } - ) - + label_cell = nbformat.v4.new_code_cell(source="this_notebook = '{}'".format(label)) nb.cells.insert(1, label_cell) return nb