From 2fd542bac1d05d3a675784826c144f98d126e177 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Thu, 24 Mar 2022 16:18:40 +0100 Subject: [PATCH 1/3] Add notebook template --- .../notebooks/notebook_template.ipynb | 482 ++++++++++++++++++ 1 file changed, 482 insertions(+) create mode 100644 protopipe/benchmarks/notebooks/notebook_template.ipynb diff --git a/protopipe/benchmarks/notebooks/notebook_template.ipynb b/protopipe/benchmarks/notebooks/notebook_template.ipynb new file mode 100644 index 00000000..01276263 --- /dev/null +++ b/protopipe/benchmarks/notebooks/notebook_template.ipynb @@ -0,0 +1,482 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "22bd313b-f2f8-4b13-b3e2-5e93e9ad01d3", + "metadata": {}, + "source": [ + "
\n", + "IMPORTANT: Colored boxes are for editing instructions and must be removed before adding the new notebook to the benchmarking suite.\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "ac72eed4-735b-4c45-926a-697778db20d2", + "metadata": {}, + "source": [ + "# Title - Analysis stage - Data Level" + ] + }, + { + "cell_type": "markdown", + "id": "0b1770f9-3075-48cc-971d-99786ac6c88b", + "metadata": {}, + "source": [ + "
\n", + "
  • Title example: Shower geometry reconstruction - TRAINING - DL2a
  • \n", + "
  • path/filename: protopipe/benchmarks/notebooks/analysis-stage/benchmarks_DLXy_Title.ipynb
  • \n", + "
  • Data level: should be short and easy (e.g. image cleaning, direction reconstruction, etc..)
  • \n", + "
  • Analysis stage: will in general depend from the analysis workflow, but some are already in use as TRAINING, MODELS, DL2, DL3
  • \n", + "
  • Datasample names: should reflect particle and analysis stage as an integer, e.g gamma1 or proton2
  • \n", + "
  • Cell tags: in principle these notebooks are made to show results, so make sure that input cells are removed or hidden and that the only headers that will appear under HTML are those with the name of the benchmarks (the results repository will perform this automatically) - go here for a list of cell tags to use
  • \n", + " \n", + "
    " + ] + }, + { + "cell_type": "markdown", + "id": "9e62c814-4ca4-46b7-afd6-87d3c3186034", + "metadata": {}, + "source": [ + "**Recommended datasample(s):** `datasample name` (dataset used to XXX)\n", + "\n", + "**Data level(s):** DLXy (user-friendly short description) + ...\n", + "\n", + "**Description:**\n", + "\n", + "This notebook contains plots and benchmarks proposals from the _protopipe_ pipeline related to ...\n", + "\n", + "**Requirements and steps to reproduce:**\n", + "\n", + "- get a XXX data generated using `protopipe-XXX` (or ctapipe-process + XXX.json)\n", + "- execute the notebook with `protopipe-BENCHMARK`,\n", + "\n", + "`protopipe-BENCHMARK launch --config_file configs/benchmarks.yaml -n TRAINING/benchmarks_DLXy_title`\n", + "\n", + "To obtain the list of all available parameters add ``--help-notebook``.\n", + "\n", + "**Development and testing:**\n", + "\n", + "As with any other part of _protopipe_ and being part of the official repository, this notebook can be further developed by any interested contributor. \n", + "The execution of this notebook is not currently automatic, it must be done locally by the user _before_ pushing a pull-request. \n", + "Please, **strip the output before pushing**." + ] + }, + { + "cell_type": "markdown", + "id": "afa7e683-68ed-4a1a-9b8c-800f0ed68787", + "metadata": {}, + "source": [ + "## Table of contents\n", + "- [Benchmark name 1](#Benchmark-name-1)\n", + " - [Benchmark name 1.1](#Benchmark-name-1.1)" + ] + }, + { + "cell_type": "markdown", + "id": "b2e6e63a-456d-48c5-a7b4-ac2811606b83", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "markdown", + "id": "ee08c458-3bb0-4c20-8086-9de8c06ded59", + "metadata": {}, + "source": [ + "
    \n", + "
  • Import only what is strictly necessary to run the notebook
  • \n", + "
  • Privilege Python's standard library
  • \n", + "
  • Optional imports (e.g. seaborn and uproot) should be performed after the injection of the parameters from papermill (see later)

  • \n", + "\n", + " The following cell gives an example of what might be useful.\n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1466b9c2-6d6a-4a8f-9532-6a4a225a5ee0", + "metadata": {}, + "outputs": [], + "source": [ + "# Python Standard Library\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "# Data handling\n", + "import tables\n", + "import pandas\n", + "import numpy as np\n", + "\n", + "# Plotting\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib.colors import LogNorm\n", + "from matplotlib.pyplot import rc\n", + "import matplotlib.style as style\n", + "from cycler import cycler\n", + "\n", + "# protopipe I/O and benchmarking API\n", + "from protopipe.pipeline.io import get_camera_names, read_protopipe_TRAINING_per_tel_type\n", + "from protopipe.benchmarks.utils import string_to_boolean, get_fig_size" + ] + }, + { + "cell_type": "markdown", + "id": "d41ffaf6-06d6-4e7d-8f07-5f920390b627", + "metadata": {}, + "source": [ + "## Input data" + ] + }, + { + "cell_type": "markdown", + "id": "c6b24e34-2a3e-4ec4-b434-cb877e305ad6", + "metadata": {}, + "source": [ + "
    \n", + "
  • The next cell contains an example list of parameters
  • \n", + "
  • also, more parameters could be injected by the user at runtime via the CLI
  • \n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ab10057-dab0-4b65-9b11-c2c956ea22e5", + "metadata": {}, + "outputs": [], + "source": [ + "# Parametrized cell\n", + "\n", + "# General I/O options\n", + "analyses_directory = None\n", + "output_directory = Path.cwd() # default output directory for plots\n", + "analysis_name = None\n", + "input_filename = None # Name of the file produced with protopipe\n", + "\n", + "# CTAMARS or other ROOT-based data to load (if none, just remove)\n", + "load_CTAMARS = True # Enable to compare the CTAN analysis done with CTAMARS (Release 2019)\n", + "indir_CTAMARS = None # Path to CTAMARS data (if load_CTAMARS is True)\n", + "input_file_name_CTAMARS = \"CTA_check_dl2_4L15M.root\" # Name of the CTAMARS reference file to use (if load_CTAMARS is True)\n", + "input_simtel_filepath = None # simtel file used to plot telescope positions\n", + "\n", + "# Comparison between protopipe analyses\n", + "export_data = True # If True export data in CSV format\n", + "superimpose = False # If True superimpose results from 'analysis_name_2' data files (requires 'export_data'=True)\n", + "analysis_name_2 = None\n", + "\n", + "# Plotting options (see benchmarks.yaml)\n", + "export_plots = False # if True, save plots in a format (PNG by default) into a \"plots\" folder\n", + "plots_format = \"png\"\n", + "plots_scale = None\n", + "use_seaborn = False # If True import seaborn and apply global settings from config file" + ] + }, + { + "cell_type": "markdown", + "id": "0fe80b64-2576-4b01-b1ea-048e1d4fd26b", + "metadata": {}, + "source": [ + "
    \n", + "
  • At runtime, papermill will inject the final values of all parameters after this cell.
  • \n", + "
  • The cell after that should deal with the conversion of papermill format conversions (it converts CLI injected parameters to strings) and with the optional imports.
  • \n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "501aaec5-f214-4683-a238-d19e013b7803", + "metadata": {}, + "outputs": [], + "source": [ + "# Handle boolean variables (papermill reads them as strings)\n", + "[load_CTAMARS,\n", + " load_EventDisplay,\n", + " use_seaborn,\n", + " export_data,\n", + " export_plots,\n", + " superimpose] = string_to_boolean([load_CTAMARS,\n", + " load_EventDisplay,\n", + " use_seaborn,\n", + " export_data,\n", + " export_plots,\n", + " superimpose])\n", + "if use_seaborn:\n", + " try:\n", + " import seaborn as sns\n", + " except ImportError:\n", + " sys.exit(\"ERROR: seaborn was enabled, but it doesn't seem to be installed in this environemnt.\")\n", + "\n", + "if load_CTAMARS:\n", + " try:\n", + " import uproot\n", + " except ImportError:\n", + " sys.exit(\"ERROR: ROOT-based data was requested, but uproot doesn't seem to be installed in this environemnt.\")" + ] + }, + { + "cell_type": "markdown", + "id": "d85a2b62-e8fc-4593-a903-69ef33c86bed", + "metadata": {}, + "source": [ + "
    \n", + "
  • The next cell makes sure the required filenames are defined by prioritizing benchmarks.yaml, then the notebook itself, otherwise an error is raised.
  • \n", + "
  • The data input process could be refactored under protopipe.benchmarks.operations.
  • \n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef4c29c0-834b-43af-a7cc-b518792f6e30", + "metadata": {}, + "outputs": [], + "source": [ + "if input_filename is None:\n", + " try:\n", + " input_filename = input_filenames[\"TRAINING_energy_gamma\"]\n", + " except (NameError, KeyError):\n", + " raise ValueError(\"The name of the input file is undefined: please use benchmarks.yaml or define it using the CLI.\")\n", + "\n", + "if input_simtel_filepath is None:\n", + " try:\n", + " input_simtel_filepath = Path(input_filenames[\"simtel\"])\n", + " except (NameError, KeyError, TypeError):\n", + " input_simtel_filepath = None # a warning is print later\n", + " finally:\n", + " if (input_filenames[\"simtel\"]==\"\"):\n", + " input_simtel_filepath = None\n", + "else:\n", + " input_simtel_filepath = Path(input_simtel_filepath)\n", + "\n", + "# only if all required datafiles are defined, then start the data input process\n", + "input_directory = Path(analyses_directory) / analysis_name / Path(\"data/TRAINING/for_energy_estimation/gamma\")\n", + "cameras = get_camera_names(input_directory, input_filename)\n", + "data = read_protopipe_TRAINING_per_tel_type(input_directory, input_filename, cameras)" + ] + }, + { + "cell_type": "markdown", + "id": "0ca15c0f-cc04-42bf-90a1-9899c7ebc05d", + "metadata": {}, + "source": [ + "
    \n", + "
  • The next 2 cells setup the output folders for plots and data (if exported) and the final plotting settings
  • \n", + "
  • The code contained in the plotting setting cell could be refactored under protopipe.benchmarks.plots.
  • \n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "348f0e9d-2ba1-4ab3-b963-1fa53c190511", + "metadata": {}, + "outputs": [], + "source": [ + "# First we check if a \"plots\" folder exists already. \n", + "# If not, we create it.\n", + "if export_plots:\n", + " plots_folder = Path(output_directory) / \"plots\"\n", + " plots_folder.mkdir(parents=True, exist_ok=True)\n", + "\n", + "# Next we check if a \"data\" folder exists already. \n", + "# If not, we create it.\n", + "if export_data:\n", + " data_folder = Path(output_directory) / \"data\"\n", + " data_folder.mkdir(parents=True, exist_ok=True)\n", + "\n", + "if superimpose:\n", + " input_directory_data_2 = Path(analyses_directory) / analysis_name_2/ \"benchmarks_results/TRAINING\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a66db477-61f4-4e09-bf44-c13247148b82", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot aesthetics settings\n", + "\n", + "scale = matplotlib_settings[\"scale\"] if plots_scale is None else float(plots_scale)\n", + "\n", + "style.use(matplotlib_settings[\"style\"])\n", + "cmap = matplotlib_settings[\"cmap\"]\n", + "\n", + "if matplotlib_settings[\"style\"] == \"seaborn-colorblind\":\n", + " \n", + " colors_order = ['#0072B2', '#D55E00', '#F0E442', '#009E73', '#CC79A7', '#56B4E9']\n", + " rc('axes', prop_cycle=cycler(color=colors_order))\n", + "\n", + "if use_seaborn:\n", + " import seaborn as sns\n", + "\n", + " sns.set_theme(context=seaborn_settings[\"theme\"][\"context\"] if \"context\" in seaborn_settings[\"theme\"] else \"talk\",\n", + " style=seaborn_settings[\"theme\"][\"style\"] if \"style\" in seaborn_settings[\"theme\"] else \"whitegrid\",\n", + " palette=seaborn_settings[\"theme\"][\"palette\"] if \"palette\" in seaborn_settings[\"theme\"] else None,\n", + " font=seaborn_settings[\"theme\"][\"font\"] if \"font\" in seaborn_settings[\"theme\"] else \"Fira Sans\",\n", + " font_scale=seaborn_settings[\"theme\"][\"font_scale\"] if \"font_scale\" in seaborn_settings[\"theme\"] else 1.0,\n", + " color_codes=seaborn_settings[\"theme\"][\"color_codes\"] if \"color_codes\" in seaborn_settings[\"theme\"] else True\n", + " )\n", + " \n", + " sns.set_style(seaborn_settings[\"theme\"][\"style\"], rc=seaborn_settings[\"rc_style\"])\n", + " sns.set_context(seaborn_settings[\"theme\"][\"context\"],\n", + " font_scale=seaborn_settings[\"theme\"][\"font_scale\"] if \"font_scale\" in seaborn_settings[\"theme\"] else 1.0)" + ] + }, + { + "cell_type": "markdown", + "id": "ce17ed5c-94c4-42ff-9c09-6b216bdca780", + "metadata": {}, + "source": [ + "## Prepare data" + ] + }, + { + "cell_type": "markdown", + "id": "cfb80b7c-4d5d-4f0c-883d-397298f0f58a", + "metadata": {}, + "source": [ + "
    \n", + "
  • Here you should prepare the data in the format that you need to build all the benchmarks of the notebook
  • \n", + "
  • This part can be more or less long depending on how much refactored code is available (either from protopipe or ctapipe
  • \n", + "
  • This is also the place where to define (or overwrite) reconstructed and true energy bins (would be good to define this first in benchmarks.yaml like for input data files so all benchmarks will share the same energy settings by default
  • \n", + "
    " + ] + }, + { + "cell_type": "markdown", + "id": "50635a9f-ec35-4bd9-b190-f9fc2e4192ba", + "metadata": {}, + "source": [ + "## Benchmark name 1\n", + "[back to top](#Table-of-contents)" + ] + }, + { + "cell_type": "markdown", + "id": "cfbc1ccf-dd3c-4759-8f06-3859a11b7f23", + "metadata": {}, + "source": [ + "- this benchmark shows ...\n", + "- it is expected that ..." + ] + }, + { + "cell_type": "markdown", + "id": "0da0ff4c-4376-49de-96a4-2a0060476821", + "metadata": {}, + "source": [ + "
    \n", + "
  • all benchmarks should be done in the same way
  • \n", + "
      \n", + "
    1. define figure
    2. \n", + "
    3. define data to use
    4. \n", + "
    5. make plot
    6. \n", + "
    7. (optionally) export data
    8. \n", + "
    9. (optionally) save plot
    10. \n", + "
    \n", + "
  • figures should be always initialized with protopipe.benchmarks.utils.get_fig_size
  • \n", + "
  • code to export data and plots should be refactored (improved) and called from relevant protopipe.benchmarks modules (next cell shows an examples for a 1D plot)
  • \n", + "
  • an optional cell to define specific variables for the specific benchmark can be added just before the actual plot cell (though, such an operation on data should be refactored under protopipe.benchmarks.operations if long, otherwise it can be done directly inside the benchmarking cell
  • \n", + " \n", + "
    " + ] + }, + { + "cell_type": "markdown", + "id": "9f6524e7-44f8-457a-b152-e6344c8691d5", + "metadata": {}, + "source": [ + "```python\n", + "\n", + "benchmark_name = \"benchmark example\"\n", + "fig = plt.figure(figsize=get_fig_size(ratio=4/3., scale=scale))\n", + "\n", + "# Get data\n", + "\n", + "X = ...\n", + "Y = ...\n", + "\n", + "# (Optionally) Export data used for plot\n", + "# This can be refactored as explained above\n", + "if export_data:\n", + " data_to_write = np.array([X,Y])\n", + " np.savetxt(data_folder / f\"{benchmark_name}_protopipe_{analysis_name}.csv\",\n", + " data_to_write.T,\n", + " delimiter=',',\n", + " header=\"X quantity [unit name], Y quantity [unit name]\")\n", + "\n", + "# Make plot\n", + "\n", + "options = {}\n", + "plotting_function(X, Y, **options)\n", + "\n", + "# (Optionally) Superimpose same benchmark from a different analysis\n", + "# This can be refactored as explained above\n", + "if superimpose:\n", + " data_2 = np.genfromtxt(\n", + " input_directory_data_2 / f\"data/{benchmark_name}_protopipe_{analysis_name_2}.csv\",\n", + " delimiter=',',\n", + " filling_values=True).T\n", + " plt.plot(data_2[0], data_2[1], '-.', label = f\"protopipe {analysis_name_2}\")\n", + "\n", + "# all other plot options should be added at the very end\n", + "plt.xlabel()\n", + "plt.ylabel()\n", + "plt.legend()\n", + "plt.grid() # take care when using this with seaborn enabled\n", + "\n", + "# (Optionally) Export plot\n", + "if export_plots:\n", + " plt.savefig(plots_folder / f\"{benchmark_name}_protopipe_{analysis_name}.{plots_format}\")\n", + " \n", + "None # to remove clutter by matplotlib objects\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "8619d33f-8c37-44a9-8f79-96328ce978f7", + "metadata": {}, + "source": [ + "## Benchmark name 1.1\n", + "[back to top](#Table-of-contents)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1fcd092-758c-44b9-b65a-2b2a95078a1a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From c6b13b2e14241ea8b9a9f678f61f6a0378101162 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Fri, 25 Mar 2022 10:18:43 +0100 Subject: [PATCH 2/3] Fix docstring --- protopipe/benchmarks/operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protopipe/benchmarks/operations.py b/protopipe/benchmarks/operations.py index cfe9958b..06c3a5c1 100644 --- a/protopipe/benchmarks/operations.py +++ b/protopipe/benchmarks/operations.py @@ -209,7 +209,7 @@ def get_evt_subarray_model_output( Name of averaged model output (shower level) Returns - -------- + ------- data: `~pandas.DataFrame` Data frame """ From 60d35c4e1bf21f8b18696453f88d7790f2a70067 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Fri, 25 Mar 2022 10:20:16 +0100 Subject: [PATCH 3/3] Fix jinja 3.0.3 due to bug in jupyter nbconvert --- environment_development.yml | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/environment_development.yml b/environment_development.yml index 0e738f8c..fa905385 100644 --- a/environment_development.yml +++ b/environment_development.yml @@ -9,6 +9,7 @@ dependencies: - astropy>=4.0.1 - h5py=2 - ipython + - jinja2=3.0.3 - jupyterlab>=3.1.10 - jupyter-book - conda-forge::nbsphinx diff --git a/setup.py b/setup.py index 09e32003..9b5ee3ca 100644 --- a/setup.py +++ b/setup.py @@ -15,6 +15,7 @@ "ctapipe==0.11.0", "pyirf==0.5.0", "pandas>=1.0.0", + "jinja2==3.0.3", ], "tests": [ # pipeline tests