pepkit · donaldcampbelljr · Jul 18, 2023 · Jul 10, 2023 · Jul 11, 2023 · Jul 11, 2023
diff --git a/.gitignore b/.gitignore
@@ -136,3 +136,4 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+/tests/data/reports/
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -2,3 +2,4 @@ include requirements/*
 include README.md
 include pipestat/schemas/*
 include pipestat/backends/*
+include pipestat/jinja_templates/*
diff --git a/docs/api_docs.md b/docs/api_docs.md
@@ -39,7 +39,7 @@ Pipestat standardizes reporting of pipeline results and pipeline status manageme
 
 
 ```python
-def __init__(self, sample_name: Optional[str]=None, schema_path: Optional[str]=None, results_file_path: Optional[str]=None, database_only: Optional[bool]=True, config_file: Optional[str]=None, config_dict: Optional[dict]=None, flag_file_dir: Optional[str]=None, show_db_logs: bool=False, pipeline_type: Optional[str]=None, pipeline_name: Optional[str]='default_pipeline_name', result_formatter: staticmethod=<function default_formatter at 0x7f70b1358c10>, multi_pipelines: bool=False)
+def __init__(self, sample_name: Optional[str]=None, schema_path: Optional[str]=None, results_file_path: Optional[str]=None, database_only: Optional[bool]=True, config_file: Optional[str]=None, config_dict: Optional[dict]=None, flag_file_dir: Optional[str]=None, show_db_logs: bool=False, pipeline_type: Optional[str]=None, pipeline_name: Optional[str]='default_pipeline_name', result_formatter: staticmethod=<function default_formatter at 0x7f3c2fc69360>, multi_pipelines: bool=False)
 ```
 
 Initialize the PipestatManager object
@@ -292,6 +292,12 @@ Status schema source
 
 
 
+```python
+def summarize(self, *args, **kwargs)
+```
+
+
+
 ```python
 def validate_schema(self) -> None
 ```

diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,6 +1,11 @@
 # Changelog
 
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.
+## [0.5.0] - 2023-XX-XX
+
+### Added
+
+- Add summarize function to generate static html results report.
 
 ## [0.4.0] - 2023-XX-XX
 

diff --git a/docs/usage.md b/docs/usage.md
@@ -6,7 +6,8 @@ Here you can see the command-line usage instructions for the main command and fo
 ## `pipestat --help`
 ```console
 version: 0.4.0
-usage: pipestat [-h] [--version] [--silent] [--verbosity V] [--logdev] {report,inspect,remove,retrieve,status,init} ...
+usage: pipestat [-h] [--version] [--silent] [--verbosity V] [--logdev]
+                {report,inspect,remove,retrieve,status,init} ...
 
 pipestat - report pipeline results
 
@@ -26,10 +27,12 @@ options:
   --verbosity V         Set logging level (1-5 or logging module level name)
   --logdev              Expand content of logging message format.
 
-Pipestat standardizes reporting of pipeline results and pipeline status management. It formalizes a way for pipeline 
-developers and downstream tools developers to communicate -- results produced by a pipeline can easily and
-reliably become an input for downstream analyses. A PipestatManager object exposes an API for interacting with the 
-results and pipeline status and can be backed by either a YAML-formatted file or a database.
+Pipestat standardizes reporting of pipeline results and pipeline status
+management. It formalizes a way for pipeline developers and downstream tools
+developers to communicate -- results produced by a pipeline can easily and
+reliably become an input for downstream analyses. A PipestatManager object
+exposes an API for interacting with the results and pipeline status and can be
+backed by either a YAML-formatted file or a database.
 ```
 
 ## `pipestat report --help`
@@ -55,8 +58,8 @@ options:
                                reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
                                will be used. Currently not set
   --status-schema ST           Path to the status schema. Default will be used if not
-                               provided: /usr/local/lib/python3.10/site-
-                               packages/pipestat/schemas/status_schema.yaml
+                               provided: /home/drc/GITHUB/pipestat/pipestat/venv/lib/pytho
+                               n3.10/site-packages/pipestat/schemas/status_schema.yaml
   --flag-dir FD                Path to the flag directory in case YAML file is the
                                pipestat backend.
   -i I, --result-identifier I  ID of the result to report; needs to be defined in the
@@ -69,7 +72,6 @@ options:
                                name clashes
   -t, --skip-convert           Whether skip result type conversion into the required class
                                in case it does not meet the schema requirements
-
 ```
 
 ## `pipestat inspect --help`
@@ -79,9 +81,9 @@ usage: pipestat inspect [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema S
 
 Inspect a database.
 
-optional arguments:
+options:
   -h, --help              show this help message and exit
-  -n N, --project-name N     Name of the pipeline to report result for. If not provided
+  -n N, --project-name N  Name of the pipeline to report result for. If not provided
                           'PIPESTAT_PROJECT_NAME' env var will be used. Currently not set
   -f F, --results-file F  Path to the YAML file where the results will be stored. This
                           file will be used as pipestat backend and to restore the
@@ -94,7 +96,7 @@ optional arguments:
                           reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var will
                           be used. Currently not set
   --status-schema ST      Path to the status schema. Default will be used if not provided:
-                          /usr/local/lib/python3.9/site-
+                          /home/drc/GITHUB/pipestat/pipestat/venv/lib/python3.10/site-
                           packages/pipestat/schemas/status_schema.yaml
   --flag-dir FD           Path to the flag directory in case YAML file is the pipestat
                           backend.
@@ -124,16 +126,15 @@ options:
                                reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
                                will be used. Currently not set
   --status-schema ST           Path to the status schema. Default will be used if not
-                               provided: /usr/local/lib/python3.10/site-
-                               packages/pipestat/schemas/status_schema.yaml
+                               provided: /home/drc/GITHUB/pipestat/pipestat/venv/lib/pytho
+                               n3.10/site-packages/pipestat/schemas/status_schema.yaml
   --flag-dir FD                Path to the flag directory in case YAML file is the
                                pipestat backend.
   -i I, --result-identifier I  ID of the result to report; needs to be defined in the
                                schema
   -r R, --sample-name R        ID of the record to report the result for. If not provided
                                'PIPESTAT_SAMPLE_NAME' env var will be used. Currently not
                                set
-
 ```
 
 ## `pipestat retrieve --help`
@@ -159,16 +160,15 @@ options:
                                reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
                                will be used. Currently not set
   --status-schema ST           Path to the status schema. Default will be used if not
-                               provided: /usr/local/lib/python3.10/site-
-                               packages/pipestat/schemas/status_schema.yaml
+                               provided: /home/drc/GITHUB/pipestat/pipestat/venv/lib/pytho
+                               n3.10/site-packages/pipestat/schemas/status_schema.yaml
   --flag-dir FD                Path to the flag directory in case YAML file is the
                                pipestat backend.
   -i I, --result-identifier I  ID of the result to report; needs to be defined in the
                                schema
   -r R, --sample-name R        ID of the record to report the result for. If not provided
                                'PIPESTAT_SAMPLE_NAME' env var will be used. Currently not
                                set
-
 ```
 
 ## `pipestat status --help`
@@ -182,7 +182,7 @@ positional arguments:
     set       Set status.
     get       Get status.
 
-optional arguments:
+options:
   -h, --help  show this help message and exit
 ```
 
@@ -208,8 +208,8 @@ options:
                           reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var will
                           be used. Currently not set
   --status-schema ST      Path to the status schema. Default will be used if not provided:
-                          /usr/local/lib/python3.10
-                          /site-packages/pipestat/schemas/status_schema.yaml
+                          /home/drc/GITHUB/pipestat/pipestat/venv/lib/python3.10/site-
+                          packages/pipestat/schemas/status_schema.yaml
   --flag-dir FD           Path to the flag directory in case YAML file is the pipestat
                           backend.
   -r R, --sample-name R   ID of the record to report the result for. If not provided
@@ -242,9 +242,11 @@ options:
                           reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var will
                           be used. Currently not set
   --status-schema ST      Path to the status schema. Default will be used if not provided:
-                          /usr/local/lib/python3.10/site-packages/pipestat/schemas/status_schema.yaml
+                          /home/drc/GITHUB/pipestat/pipestat/venv/lib/python3.10/site-
+                          packages/pipestat/schemas/status_schema.yaml
   --flag-dir FD           Path to the flag directory in case YAML file is the pipestat
                           backend.
   -r R, --sample-name R   ID of the record to report the result for. If not provided
                           'PIPESTAT_SAMPLE_NAME' env var will be used. Currently not set
 ```
+
diff --git a/docs_jupyter/cli.ipynb b/docs_jupyter/cli.ipynb
@@ -484,6 +484,15 @@
     "cat /usr/local/lib/python3.9/site-packages/pipestat/schemas/status_schema.yaml"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## HTML Report Generation\n",
+    "\n",
+    "To generate a static html report, call `pipestat summarize --results-file PIPESTAT_RESULTS_FILE --schema PIPESTAT_RESULTS_SCHEMA`"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 51,

diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb
@@ -1128,12 +1128,23 @@
     "psm_no_schema.get_status(sample_name=\"sample1\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate static HTML Report using the `summarize` command\n",
+    "\n",
+    "You can generate a static browsable html report using the `summarize` function:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "psm.summarize()"
+   ]
   }
  ],
  "metadata": {

diff --git a/pipestat/argparser.py b/pipestat/argparser.py
@@ -13,13 +13,15 @@
 RETRIEVE_CMD = "retrieve"
 STATUS_CMD = "status"
 INIT_CMD = "init"
+SUMMARIZE_CMD = "summarize"
 SUBPARSER_MESSAGES = {
     REPORT_CMD: "Report a result.",
     INSPECT_CMD: "Inspect a database.",
     REMOVE_CMD: "Remove a result.",
     RETRIEVE_CMD: "Retrieve a result.",
     STATUS_CMD: "Manage pipeline status.",
     INIT_CMD: "Initialize generic config file",
+    SUMMARIZE_CMD: "Generates HTML Report",
 }
 
 STATUS_GET_CMD = "get"
@@ -255,4 +257,30 @@ def add_subparser(
         "-d", "--data", action="store_true", help="Whether to display the data"
     )
 
+    # Summarize
+    for cmd in [SUMMARIZE_CMD]:
+        sps[cmd].add_argument(
+            "-f",
+            "--results-file",
+            type=str,
+            metavar="F",
+            help=f"Path to the YAML file where the results will be stored. "
+            f"This file will be used as {PKG_NAME} backend and to restore"
+            f" the reported results across sessions",
+        )
+        sps[cmd].add_argument(
+            "-c",
+            "--config",
+            type=str,
+            metavar="C",
+            help=f"Path to the YAML configuration file. {_env_txt('config')}",
+        )
+        sps[cmd].add_argument(
+            "-s",
+            "--schema",
+            type=str,
+            metavar="S",
+            help=f"Path to the schema that defines the results that can be reported. {_env_txt('schema')}",
+        )
+
     return parser
diff --git a/pipestat/backends/abstract.py b/pipestat/backends/abstract.py
@@ -87,6 +87,13 @@ def count_records(
         _LOGGER.warning("Not implemented yet for this backend")
         pass
 
+    def get_samples(
+        self,
+        pipeline_type: Optional[str] = None,
+    ):
+        _LOGGER.warning("Not implemented yet for this backend")
+        pass
+
     def get_status(self, sample_name: str, pipeline_type: Optional[str] = None) -> Optional[str]:
         _LOGGER.warning("Not implemented yet for this backend")
 
@@ -137,3 +144,7 @@ def remove_record(
         pipeline_type: Optional[str] = None,
     ) -> bool:
         _LOGGER.warning("Not implemented yet for this backend")
+
+    def summarize(self) -> None:
+        _LOGGER.warning("Not implemented yet for this backend")
+        pass
diff --git a/pipestat/backends/dbbackend.py b/pipestat/backends/dbbackend.py
@@ -138,6 +138,42 @@ def get_one_record(self, table_name: str, rid: Optional[str] = None):
                 if record:
                     return record
 
+    def get_samples(
+        self,
+        pipeline_type: Optional[str] = None,
+    ) -> Optional[list]:
+        """Returns list of sample names and pipeline type as a list of tuples that have been reported, regardless of sample or project level"""
+        all_samples_list = []
+
+        if pipeline_type is not None:
+            table_name = self.get_table_name(pipeline_type)
+            mod = self.get_model(table_name=table_name, strict=True)
+            with self.session as s:
+                sample_list = []
+                stmt = sql_select(mod)
+                records = s.exec(stmt).all()
+                for i in records:
+                    pair = (i.sample_name, pipeline_type)
+                    sample_list.append(pair)
+
+            return sample_list
+        else:
+            pipelines = ["sample", "project"]
+            for i in pipelines:
+                pipeline_type = i
+                table_name = self.get_table_name(pipeline_type)
+                mod = self.get_model(table_name=table_name, strict=True)
+                with self.session as s:
+                    sample_list = []
+                    stmt = sql_select(mod)
+                    records = s.exec(stmt).all()
+                    for i in records:
+                        pair = (i.sample_name, pipeline_type)
+                        sample_list.append(pair)
+
+                all_samples_list += sample_list
+            return all_samples_list
+
     def get_status(self, sample_name: str, pipeline_type: Optional[str] = None) -> Optional[str]:
         """
         Get pipeline status

diff --git a/pipestat/backends/filebackend.py b/pipestat/backends/filebackend.py
@@ -144,6 +144,29 @@ def get_flag_file(self, sample_name: str = None) -> Union[str, List[str], None]:
             return None
         pass
 
+    def get_samples(
+        self,
+        pipeline_type: Optional[str] = None,
+    ) -> Optional[list]:
+        """Returns list of sample names and pipeline type as a list of tuples that have been reported, regardless of sample or project level"""
+        all_samples_list = []
+
+        if pipeline_type is not None:
+            for k in list(self._data.data[self.pipeline_name][pipeline_type].keys()):
+                pair = (k, pipeline_type)
+                all_samples_list.append(pair)
+            return all_samples_list
+
+        else:
+            keys = self._data.data[self.pipeline_name].keys()
+        for k in keys:
+            sample_list = []
+            for i in list(self._data.data[self.pipeline_name][k].keys()):
+                pair = (i, k)
+                sample_list.append(pair)
+            all_samples_list += sample_list
+        return all_samples_list
+
     def get_status(self, sample_name: str, pipeline_type: Optional[str] = None) -> Optional[str]:
         """
         Get the current pipeline status
@@ -442,6 +465,23 @@ def set_status(
         if prev_status:
             _LOGGER.debug(f"Changed status from '{prev_status}' to '{status_identifier}'")
 
+    def summarize(self) -> None:
+        """
+        summarize all reported results by building html report
+        """
+        _LOGGER.debug("Make HTML report here")
+        print("DEBUG SUMMARIZE")
+        self._htmlreportbuilder()
+
+    def _htmlreportbuilder(self):
+        """
+        build html report based on all reported results
+        """
+
+        # build new folder for the report
+        self.reports_dir = os.path.join(self.results_file_path, "reports")
+        _LOGGER.debug(f"Reports dir: {self.reports_dir}")
+
     def _init_results_file(self) -> None:
         """
         Initialize YAML results file if it does not exist.