Merge pull request #70 from haniffalab/dev

Preparation for WebAtlas manuscript
haniffalab · Apr 25, 2023 · ff16fc1 · ff16fc1
2 parents 4b33b98 + 12fbe3e
commit ff16fc1
Show file tree

Hide file tree

Showing 73 changed files with 3,797 additions and 2,797 deletions.
diff --git a/.github/workflows/tests-python.yml b/.github/workflows/tests-python.yml
@@ -11,6 +11,10 @@ jobs:
   run:
     runs-on: ubuntu-latest
     steps:
+      - name: Install libvips
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y --no-install-recommends libvips
       - name: Checkout
         uses: actions/checkout@v2
       - name: Set up Python 3.8

diff --git a/CITATION.cff b/CITATION.cff
@@ -1,8 +1,8 @@
 cff-version: 1.2.0
 type: software
 message: "If you use this repo, please cite it"
-title: "Vitessce Pipeline"
-url: "https://github.com/haniffalab/vitessce-pipeline"
+title: "WebAtlas Pipeline"
+url: "https://github.com/haniffalab/webatlas-pipeline"
 doi: 10.5281/zenodo.7405818
 authors:
   - family-names: "Li"
@@ -11,7 +11,7 @@ authors:
   - family-names: "Horsfall"
     given-names: "Dave"
     orcid: "https://orcid.org/0000-0002-8086-812X"
-  - family-names: "Basurto Lozada"
+  - family-names: "Basurto-Lozada"
     given-names: "Daniela"
     orcid: "https://orcid.org/0000-0003-3943-8424"
   - family-names: "Prete"

diff --git a/README.md b/README.md
@@ -1,10 +1,60 @@
-[![python-tests](https://github.com/haniffalab/vitessce-pipeline/actions/workflows/tests-python.yml/badge.svg)](https://github.com/haniffalab/vitessce-pipeline/actions/workflows/tests-python.yml)
-[![codecov](https://codecov.io/gh/haniffalab/vitessce-pipeline/branch/main/graph/badge.svg?token=7HQVFH08WJ)](https://codecov.io/gh/haniffalab/vitessce-pipeline/branch/main)
+[![python-tests](https://github.com/haniffalab/webatlas-pipeline/actions/workflows/tests-python.yml/badge.svg)](https://github.com/haniffalab/webatlas-pipeline/actions/workflows/tests-python.yml)
+[![codecov](https://codecov.io/gh/haniffalab/webatlas-pipeline/branch/main/graph/badge.svg?token=7HQVFH08WJ)](https://codecov.io/gh/haniffalab/webatlas-pipeline/branch/main)
 
-# Vitessce Pipeline
+# WebAtlas Pipeline
 
-[![docs](https://img.shields.io/badge/Documentation-online-blue)](https://haniffalab.github.io/vitessce-pipeline)
-[![demo](https://img.shields.io/badge/Demos-view-blue)](https://haniffalab.github.io/vitessce-pipeline/demos.html)
+[![docs](https://img.shields.io/badge/Documentation-online-blue)](https://haniffalab.github.io/webatlas-pipeline)
+[![demo](https://img.shields.io/badge/Demos-view-blue)](https://haniffalab.github.io/webatlas-pipeline/demos.html)
 [![doi](https://zenodo.org/badge/DOI/10.5281/zenodo.7405818.svg)](https://doi.org/10.5281/zenodo.7405818)
 
-This Nextflow pipeline processes spatial and single-cell experiment data for visualisation in [vitessce-app](https://github.com/haniffalab/vitessce-app). The pipeline generates data files for [supported data types](http://vitessce.io/docs/data-types-file-types/), and builds a [view config](http://vitessce.io/docs/view-config-json/).
+This Nextflow pipeline processes spatial and single-cell experiment data for visualisation in [webatlas-app](https://github.com/haniffalab/webatlas-app). The pipeline generates data files for [supported data types](http://vitessce.io/docs/data-types-file-types/), and builds a [view config](http://vitessce.io/docs/view-config-json/).
+
+
+## Usage
+
+The pipeline can handle data from `h5ad` files, image `tif` files, SpaceRanger output, Xenium output and MERSCOPE output. It can also generate image files from data files.
+
+Running the pipeline requires a parameters file that defines configuration options and the data to be processed.
+Full instructions and parameters definitions for this files are available in the [documentation](https://haniffalab.com/webatlas-pipeline/setup.html)
+
+A parameters file looks like
+
+```yaml
+outdir: "/path/to/output/"
+
+args:
+    h5ad:
+        compute_embeddings: "True"
+
+projects:
+  - project: project_1
+    datasets:
+      - dataset: dataset_1
+        data:
+          -
+            data_type: h5ad
+            data_path: /path/to/project_1/dataset_1/anndata.h5ad
+          -
+            data_type: raw_image
+            data_path: /path/to/project_1/dataset_1/raw_image.tif
+          -
+            data_type: label_image
+            data_path: /path/to/project_1/dataset_1/label_image.tif
+
+vitessce_options:
+    spatial:
+        xy: "obsm/spatial"
+    mappings:
+        obsm/X_umap: [0,1]
+layout: "simple"
+```
+
+
+The pipeline can then be run like
+
+```sh
+nextflow run main.nf -params-file /path/to/run-params.yaml -entry Full_pipeline
+```
+
+
+Parameters file templates are available in the `templates` directory.
diff --git a/bin/build_config.py b/bin/build_config.py
@@ -42,7 +42,7 @@ def build_options(
         file_type (str): Type of file supported by Vitessce.
         file_path (str): Path to file.
         file_options (dict[str, T.Any]): Dictionary defining the options.
-        check_exist (bool, optional): Whether to check the given path to confirm the file exists. 
+        check_exist (bool, optional): Whether to check the given path to confirm the file exists.
             Defaults to False.
 
     Returns:
@@ -120,89 +120,95 @@ def build_options(
 
 
 def build_raster_options(
-    image_zarr: dict[str, dict[str, str]], url: str
+    images: dict[str, list[dict[str, T.Any]]], url: str
 ) -> dict[str, T.Any]:
     """Function that creates the View config's options for image files
 
     Args:
-        image_zarr (dict[str, dict[str, str]]): Dictionary containing a metadata dictionary
-            for each image in Zarr format. 
-        url (str): URL to prepend to each file in the config file. 
+        images (dict[str, list[dict[str, T.Any]]], optional): Dictionary containing for each image type key (raw and label)
+            a list of dictionaries (one per image of that type) with the corresponding path and metadata for that image.
+            Defaults to {}.
+        url (str): URL to prepend to each file in the config file.
             The URL to the local or remote server that will serve the files
 
     Returns:
         dict[str, T.Any]: Options dictionary for View config file
     """
     raster_options = {"renderLayers": [], "schemaVersion": "0.0.2", "images": []}
-    for image in image_zarr.keys():
-        image_name = os.path.splitext(image)[0]
-        channel_names = (
-            image_zarr[image]["channel_names"]
-            if "channel_names" in image_zarr[image]
-            else []
-        )
-        channel_names, isBitmask = (
-            (["Labels"], True)
-            if image_name.split("_")[-1] == "label" and not len(channel_names)
-            else (channel_names, False)
-        )
-        raster_options["renderLayers"].append(image_name)
-        raster_options["images"].append(
-            {
-                "name": image_name,
-                "url": os.path.join(url, image),
-                "type": "zarr",
-                "metadata": {
-                    "isBitmask": isBitmask,
-                    "dimensions": [
-                        {"field": "t", "type": "quantitative", "values": None},
-                        {
-                            "field": "channel",
-                            "type": "nominal",
-                            "values": channel_names,
-                        },
-                        {"field": "y", "type": "quantitative", "values": None},
-                        {"field": "x", "type": "quantitative", "values": None},
-                    ],
-                    "isPyramid": True,
-                    "transform": {"translate": {"y": 0, "x": 0}, "scale": 1},
-                },
-            }
-        )
+    for img_type in images.keys():  # raw, label
+        for img in images[img_type]:
+            image_name = os.path.splitext(os.path.basename(img["path"]))[0]
+            channel_names = (
+                img["md"]["channel_names"]
+                if "channel_names" in img["md"] and len(img["md"]["channel_names"])
+                else (
+                    ["Labels"]
+                    if img_type == "label"
+                    else [f"Channel {x}" for x in range(int(img["md"]["C"]))]
+                )
+            )
+            isBitmask = img_type == "label"
+            raster_options["renderLayers"].append(image_name)
+            raster_options["images"].append(
+                {
+                    "name": image_name,
+                    "url": os.path.join(url, os.path.basename(img["path"])),
+                    "type": "zarr",
+                    "metadata": {
+                        "isBitmask": isBitmask,
+                        "dimensions": [
+                            {"field": "t", "type": "quantitative", "values": None},
+                            {
+                                "field": "channel",
+                                "type": "nominal",
+                                "values": channel_names,
+                            },
+                            {"field": "y", "type": "quantitative", "values": None},
+                            {"field": "x", "type": "quantitative", "values": None},
+                        ],
+                        "isPyramid": True,
+                        "transform": {"translate": {"y": 0, "x": 0}, "scale": 1},
+                    },
+                }
+            )
     return raster_options
 
 
 def write_json(
-    title: str = "",
+    project: str = "",
     dataset: str = "",
     file_paths: list[str] = [],
-    image_zarr: dict[str, dict[str, str]] = {},
+    images: dict[str, list[dict[str, T.Any]]] = {},
     url: str = "",
-    outdir: str = "./",
-    config_filename_suffix: str = "config.json",
     options: dict[str, T.Any] = None,
     layout: str = "minimal",
     custom_layout: str = None,
+    title: str = "",
+    description: str = "",
+    config_filename_suffix: str = "config.json",
+    outdir: str = "./",
 ) -> None:
     """This function writes a Vitessce View config JSON file
 
     Args:
-        title (str, optional): Title to use in the config file. Defaults to "".
+        project (str, optional): Project name. Defaults to "".
         dataset (str, optional): Dataset name. Defaults to "".
         file_paths (list[str], optional): Paths to files that will be included in the config file. Defaults to [].
-        image_zarr (dict[str, dict[str, str]], optional): Dictionary containing a metadata dictionary
-            for each image in Zarr format. Defaults to {}.
-        url (str, optional): URL to prepend to each file in the config file. 
+        images (dict[str, list[dict[str, T.Any]]], optional): Dictionary containing for each image type key (raw and label)
+            a list of dictionaries (one per image of that type) with the corresponding path and metadata for that image.
+            Defaults to {}.
+        url (str, optional): URL to prepend to each file in the config file.
             The URL to the local or remote server that will serve the files.
             Defaults to "".
-        outdir (str, optional): Directory in which the config file will be written to. Defaults to "./".
-        config_filename_suffix (str, optional): Config filename suffix. Defaults to "config.json".
         options (dict[str, T.Any], optional): Dictionary with Vitessce config file `options`. Defaults to None.
         layout (str, optional): Type of predefined layout to use. Defaults to "minimal".
         custom_layout (str, optional): String defining a Vitessce layout following its alternative syntax.
             https://vitessce.github.io/vitessce-python/api_config.html#vitessce.config.VitessceConfig.layout
             https://github.com/vitessce/vitessce-python/blob/1e100e4f3f6b2389a899552dffe90716ffafc6d5/vitessce/config.py#L855
             Defaults to None.
+        title (str, optional): Data title to show in the visualization. Defaults to "".
+        config_filename_suffix (str, optional): Config filename suffix. Defaults to "config.json".
+        outdir (str, optional): Directory in which the config file will be written to. Defaults to "./".
 
     Raises:
         SystemExit: If no valid files have been input
@@ -211,17 +217,20 @@ def write_json(
 
     has_files = False
 
-    config = VitessceConfig(name=str(title))
-    config_dataset = config.add_dataset(str(title), str(dataset))
+    config = VitessceConfig(
+        name=str(title) if len(title) else str(project),
+        description=description,
+    )
+    config_dataset = config.add_dataset(str(dataset), str(dataset))
 
     coordination_types = defaultdict(lambda: cycle(iter([])))
-    file_paths_names = {x.split("_")[-1]: x for x in file_paths}
+    file_paths_names = {x.split("-")[-1]: x for x in file_paths}
     dts = set([])
 
-    if len(image_zarr.items()):
+    if images.keys() and any([len(images[k]) for k in images.keys()]):
         has_files = True
         config_dataset.add_file(
-            dt.RASTER, ft.RASTER_JSON, options=build_raster_options(image_zarr, url)
+            dt.RASTER, ft.RASTER_JSON, options=build_raster_options(images, url)
         )
         dts.add(dt.RASTER)
 
@@ -346,7 +355,7 @@ def write_json(
     if outdir and not os.path.isdir(outdir):
         os.mkdir(outdir)
     with open(
-        os.path.join(outdir or "", f"{title}_{dataset}_{config_filename_suffix}"), "w"
+        os.path.join(outdir or "", f"{project}-{dataset}-{config_filename_suffix}"), "w"
     ) as out_file:
         json.dump(config_json, out_file, indent=2)
 

diff --git a/bin/consolidate_md.py b/bin/consolidate_md.py
@@ -11,7 +11,7 @@
 from pathlib import Path
 
 
-def main(file_in: str) -> None:
+def consolidate(file_in: str) -> None:
     """Function to consolidate the metadata of a Zarr file
 
     Args:
@@ -26,4 +26,4 @@ def main(file_in: str) -> None:
 
 
 if __name__ == "__main__":
-    fire.Fire(main)
+    fire.Fire(consolidate)
diff --git a/bin/generate_image.py b/bin/generate_image.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+"""
+generate_image.py
+====================================
+Generates raw/label images from spatial data
+"""
+
+from __future__ import annotations
+import fire
+import typing as T
+import tifffile as tf
+from process_spaceranger import visium_label
+from process_xenium import xenium_label
+from process_merscope import merscope_label, merscope_raw
+
+
+def create_img(
+    stem: str,
+    img_type: str,
+    file_type: str,
+    file_path: str,
+    ref_img: str = None,
+    args: dict[str, T.Any] = {},
+) -> None:
+    """This function calls the corresponding function
+    to write a label image given the metadata provided.
+    It also obtains the image shape of a reference image if specified.
+
+    Args:
+        stem (str): Prefix for the output image filename.
+        file_type (str): Type of file containing the metadata from which to
+            generate the label image.
+        file_path (str): Path to the metadata file.
+        ref_img (str, optional): Path to reference image from which to get the
+            shape for the label image. Defaults to None.
+        args (dict[str,T.Any], optional): Args to be passed to the appropriate processing function.
+            Defaults to {}.
+    """
+
+    if ref_img:
+        tif_img = tf.TiffFile(ref_img)
+        args["shape"] = tif_img.pages[0].shape[:2]
+
+    if img_type == "label":
+        if file_type == "visium":
+            visium_label(stem, file_path, **args)
+        elif file_type == "merscope":
+            merscope_label(stem, file_path, **args)
+        elif file_type == "xenium":
+            xenium_label(stem, file_path, **args)
+    elif img_type == "raw":
+        if file_type == "merscope":
+            merscope_raw(stem, file_path, **args)
+
+
+if __name__ == "__main__":
+    fire.Fire(create_img)