angelolab · ngreenwald · Jul 12, 2022 · Apr 28, 2022 · Apr 29, 2022 · May 5, 2022
diff --git a/README.md b/README.md
@@ -34,7 +34,10 @@ The [second notebook](./templates/2_create_tma_mibi_run.ipynb) is for TMAs. This
 There are a number of different computational tasks to complete once a MIBI run has finished to ensure everything went smoothly. 
 
 - 3a: real time monitoring. The [MIBI monitoring](./templates/3a_monitor_MIBI_run.ipynb) notebook will monitor an ongoing MIBI run, and begin processing the image data as soon as it is generated. This notebook is being continually be updated as we move more of our processing pipeline to happen in real time as the data is generated.
-- 3b: post-run monitoring. For each step in the monitoring notebook, we have a dedicated notebook that can perform the same tasks once a run is complete. This includes [the image extraction notebook](./templates/extract_bin_file.ipynb) and the [qc metrics notebook](./templates/3b_generate_qc_metrics.ipynb). 
+- 3b - 3d: post-run monitoring. For each step in the monitoring notebook, we have a dedicated notebook that can perform the same tasks once a run is complete. 
+  - 3b: [the image extraction notebook](./templates/extract_bin_file.ipynb) will extract images from bin files that have not already been processed
+  - 3c: [qc metrics notebook](./templates/3c_generate_qc_metrics.ipynb) computes and visualizes the QC metrics for the images
+  - 3d: [median pulse heights notebook](./templates/3d_compute_median_pulse_height.ipynb) generates plots showing median pulse heights for each FOV, along with estimated run time
 
 ### 4. Processing MIBI data
 Once your run has finished, you can begin to process the data to make it ready for analysis. To remove background signal contamination, as well as compensate for channel crosstalk, you can use the [compensation](./templates/4a_compensate_image_data.ipynb) notebook. This will guide you through the Rosetta algorithm, which uses a flow-cytometry style compensation approach to remove spurious signal. 

diff --git a/templates/3a_monitor_MIBI_run.ipynb b/templates/3a_monitor_MIBI_run.ipynb
@@ -1,13 +1,20 @@
 {
  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Monitoring an ongoing MIBI run\n",
     "\n",
     "This notebook can be run alongside an active MIBIScope run.  As images are generated, this notebook will automatically pass the data through pre-specified functions, like tiff \n",
-    "extraction, qc metric computation, etc. Eventually, all of the processing steps in the toffy repo will be incorporated into the Watcher functionality here. For now, this notebook will automatically extract tifs and generate QC plots of your data. \n",
+    "extraction, qc metric computation, etc. Eventually, all of the processing steps in the toffy repo will be incorporated into the Watcher functionality here. For now, this notebook will automatically extract tifs and generate the QC and MPH plots of your data. \n",
     "\n",
     "## This notebook is an example: create a copy before running it or you will get merge conflicts!"
    ]
@@ -91,7 +98,11 @@
     "    - `axes_size`\n",
     "    - `wrap`\n",
     "    - `dpi`\n",
-    "    - `save_dir`"
+    "    - `save_dir`\n",
+    "    \n",
+    "The `plot_mph_metrics` run callback will compute the median pulse height data for each \n",
+    "FoV, and plot the results once the run has completed. Additional arguments are:\n",
+    "`regression` which when set to True will also plot the linear regression line for the data."
    ]
   },
   {
@@ -104,12 +115,15 @@
     "qc_dir = os.path.join('C:\\\\Users\\\\Customer.ION\\\\Documents\\\\run_metrics', run_name)\n",
     "\n",
     "fov_callback, run_callback = build_callbacks(\n",
-    "    run_callbacks = ['plot_qc_metrics'],\n",
+    "    run_callbacks = ['plot_qc_metrics', 'plot_mph_metrics'],\n",
     "    fov_callbacks = ['extract_tiffs'],\n",
     "    tiff_out_dir=extraction_dir,\n",
     "    qc_out_dir=qc_dir,\n",
+    "    mph_out_dir=qc_dir,\n",
+    "    plot_dir=qc_dir,\n",
     "    panel=panel,\n",
     "    intensities=False,\n",
+    "    regression=False,\n",
     ")"
    ]
   },
@@ -125,9 +139,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "toffy_env",
    "language": "python",
-   "name": "python3"
+   "name": "toffy_env"
   },
   "language_info": {
    "codemirror_mode": {
@@ -139,7 +153,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.12"
+   "version": "3.8.13"
   }
  },
  "nbformat": 4,

diff --git a/templates/3b_generate_qc_metrics.ipynb → templates/3c_generate_qc_metrics.ipynb b/templates/3b_generate_qc_metrics.ipynb → templates/3c_generate_qc_metrics.ipynb
diff --git a/templates/3d_compute_median_pulse_height.ipynb b/templates/3d_compute_median_pulse_height.ipynb
diff --git a/toffy/fov_watcher_test.py b/toffy/fov_watcher_test.py
@@ -67,10 +67,14 @@ def test_watcher(run_cbs, fov_cbs, kwargs, validators, add_blank):
 
         tiff_out_dir = os.path.join(tmpdir, 'cb_0', RUN_DIR_NAME)
         qc_out_dir = os.path.join(tmpdir, 'cb_1', RUN_DIR_NAME)
+        mph_out_dir = os.path.join(tmpdir, 'cb_2', RUN_DIR_NAME)
+        plot_dir = os.path.join(tmpdir, 'cb_3', RUN_DIR_NAME)
 
         # add directories to kwargs
         kwargs['tiff_out_dir'] = tiff_out_dir
         kwargs['qc_out_dir'] = qc_out_dir
+        kwargs['mph_out_dir'] = mph_out_dir
+        kwargs['plot_dir'] = plot_dir
 
         run_data = os.path.join(tmpdir, 'test_run')
         log_out = os.path.join(tmpdir, 'log_output')

diff --git a/toffy/mph_comp.py b/toffy/mph_comp.py
@@ -0,0 +1,188 @@
+import os
+import pandas as pd
+import numpy as np
+import json
+import matplotlib.pyplot as plt
+from natsort import natsort_keygen
+
+from mibi_bin_tools import bin_files
+from ark.utils import io_utils
+from toffy.normalize import combine_run_metrics
+
+
+def get_estimated_time(bin_file_dir, fov):
+    """Retrieve run time data for each fov json file
+    Args:
+        bin_file_dir (str): path to the FOV bin and json files
+        fov (str): name of fov to get estimated time for
+    Returns:
+        fov_time (int): estimated run time for the given fov
+    """
+
+    # path validation
+    io_utils.validate_paths(bin_file_dir)
+
+    # get fov json file in bin_file_path
+    json_file = io_utils.list_files(bin_file_dir, fov+".json")
+    if len(json_file) == 0:
+        raise FileNotFoundError(f"The FOV name supplied doesn't have a JSON file: {fov}")
+
+    # retrieve estimated time (frame dimensions x pixel dwell time)
+    with open(os.path.join(bin_file_dir, json_file[0])) as file:
+        run_metadata = json.load(file)
+        try:
+            size = run_metadata.get('frameSize')
+            time = run_metadata.get('dwellTimeMillis')
+            estimated_time = int(size**2 * time)
+        except TypeError:
+            raise KeyError("The FOV json file is missing one of the necessary keys "
+                           "(frameSize or dwellTimeMillis)")
+
+    return estimated_time
+
+
+def generate_time_ticks(mph_df):
+    """Create a time axis for median pulse heights with ticks at approx. 6 hour increments
+    Args:
+         mph_df: contains mph date, specifically requires cum_total_count and cum_total_time
+         columns
+    Returns:
+        list of two lists detailing tick locations and tick number labels
+    """
+
+    # determine number of ticks and what the labels should be based on total run time
+    sub_df = mph_df[['cum_total_count', 'cum_total_time']]
+    total_time = sub_df['cum_total_time'].iloc[-1]
+    tick_num = int(total_time / (6*(3600*1000)))
+    tick_labels = [i * 6 for i in range(0, tick_num+1)]
+    time_ticks = [tick*(3600*1000) for tick in tick_labels[1:len(tick_labels)]]
+
+    # find count value associated with the time closest to each tick
+    tick_locations = [0]
+    for tick in time_ticks:
+        count_tick = (sub_df.iloc[(sub_df['cum_total_time']
+                                   - tick).abs().argsort()[:1]])['cum_total_count']
+        count_tick = (count_tick.to_string()).split(' ')[4]
+        tick_locations.append(int(count_tick)/1000000)
+
+    return [tick_locations, tick_labels]
+
+
+def compute_mph_metrics(bin_file_dir, csv_dir, fov, mass=98, mass_start=97.5, mass_stop=98.5):
+    """Retrieves total counts, pulse heights, & estimated time for a given FOV
+        Args:
+            bin_file_dir (str): path to the FOV bin and json files
+            csv_dir (str): path to output csv to
+            fov (string): name of fov bin file without the extension
+            mass (float): mass for the panel
+            mass_start (float): beginning of mass integration range
+            mass_stop (float): end of mass integration range
+            """
+
+    target = None
+    panel = pd.DataFrame([{
+        'Mass': mass,
+        'Target': target,
+        'Start': mass_start,
+        'Stop': mass_stop,
+    }])
+
+    # retrieve the data from bin file and output to individual csv
+    pulse_height_file = fov + '-mph_pulse.csv'
+
+    try:
+        median = bin_files.get_median_pulse_height(bin_file_dir, fov,
+                                                   target, panel)
+        count_dict = bin_files.get_total_counts(bin_file_dir, [fov])
+    except FileNotFoundError:
+        raise FileNotFoundError(f"The FOV name supplied doesn't have a JSON file: {fov}")
+
+    count = count_dict[fov]
+    time = get_estimated_time(bin_file_dir, fov)
+
+    out_df = pd.DataFrame({
+        'fov': [fov],
+        'MPH': [median],
+        'total_count': [count],
+        'time': [time]})
+
+    # saves individual .csv files to csv_dir
+    out_df.to_csv(os.path.join(csv_dir, pulse_height_file), index=False)
+
+
+def combine_mph_metrics(csv_dir, return_data=False):
+    """Combines data from individual csvs into one
+        Args:
+            csv_dir (str): path where FOV mph data csvs are stored
+            return_data (bool): whether to return dataframe with mph metrics, default False
+
+        Returns:
+            combined mph data for all FOVs
+            """
+
+    # path validation checks
+    io_utils.validate_paths(csv_dir)
+
+    # combine individual csv files
+    combine_run_metrics(csv_dir, 'mph_pulse')
+
+    # calculate cumulative sums of total counts and time
+    combined_df = pd.read_csv(os.path.join(csv_dir, 'mph_pulse_combined.csv'))
+    combined_df = combined_df.sort_values(by="fov", key=natsort_keygen())
+    combined_df['cum_total_count'] = combined_df['total_count'].cumsum()
+    combined_df['cum_total_time'] = combined_df['time'].cumsum()
+
+    combined_df.to_csv(os.path.join(csv_dir, 'mph_pulse_combined.csv'), index=False)
+
+    # return data
+    if return_data:
+        return combined_df
+
+
+def visualize_mph(mph_df, out_dir, regression: bool = False):
+    """Create a scatterplot visualizing median pulse heights by FOV cumulative count
+        Args:
+            mph_df (pd.DataFrame): data detailing total counts and pulse heights
+            out_dir (str): path of directory to save plot to
+            regression (bool): whether to plot regression line, default is False
+            """
+
+    # path validation checks
+    if out_dir is not None:
+        io_utils.validate_paths(out_dir)
+
+    # visualize the median pulse heights
+    plt.style.use('dark_background')
+    # plt.title('FOV total counts vs median pulse height')
+    fig = plt.figure()
+    ax1 = fig.add_subplot(111)
+    x = mph_df['cum_total_count']/1000000
+    y = mph_df['MPH']
+    ax1.set_xlabel('FOV cumulative count (in millions)')
+    ax1.set_ylabel('median pulse height')
+    ax1.scatter(x, y)
+    ax2 = ax1.twiny()
+    ax2.set_xlabel('estimated time (hours)')
+
+    # create time axis
+    new_ticks = generate_time_ticks(mph_df)
+    tick_locations = new_ticks[0]
+    tick_labels = new_ticks[1]
+    ax2.set_xlim(ax1.get_xlim())
+    ax2.set_xticks(tick_locations)
+    ax2.set_xticklabels(tick_labels)
+    plt.gcf().set_size_inches(18.5, 10.5)
+
+    # plot regression line
+    if regression:
+        # plot with regression line
+        x2 = np.array(mph_df['cum_total_count']/1000000)
+        y2 = np.array(mph_df['MPH'])
+        m, b = np.polyfit(x2, y2, 1)
+        ax1.plot(x2, m * x2 + b)
+
+    # save figure
+    file_path = os.path.join(out_dir, 'fov_vs_mph.jpg')
+    if os.path.exists(file_path):
+        os.remove(file_path)
+    plt.savefig(file_path)