From 16d5a4218fd2a858ff79b5c957c9fc03ba31f782 Mon Sep 17 00:00:00 2001
From: Philipp Hornauer <philipp.hornauer@bsse.ethz.ch>
Date: Fri, 11 Oct 2024 01:22:28 +0200
Subject: [PATCH] Implemented basic workflow compatible with new si version as
 a notebook

---
 .pre-commit-config.yaml                   |   9 +
 notebooks/part2_template-extraction.ipynb |  12 +-
 notebooks/test_sorting_si101.ipynb        | 376 ++++++++++++++++++++++
 3 files changed, 396 insertions(+), 1 deletion(-)
 create mode 100644 notebooks/test_sorting_si101.ipynb

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3530f8b..367beb9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,5 +1,14 @@
 # .pre-commit-config.yaml
 repos:
+- repo: local
+  hooks:
+    - id: jupyter-nb-clear-output
+      name: jupyter-nb-clear-output
+      files: \.ipynb$
+      stages: [pre-commit]
+      language: system
+      entry: jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace
+
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.1.0  # this is optional, use `pre-commit autoupdate` to get the latest rev!
     hooks:
diff --git a/notebooks/part2_template-extraction.ipynb b/notebooks/part2_template-extraction.ipynb
index c4c99c9..8f9c0c8 100755
--- a/notebooks/part2_template-extraction.ipynb
+++ b/notebooks/part2_template-extraction.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "92f849ca-8cfa-47c9-a2a5-9a84c1b1ed3f",
    "metadata": {},
    "outputs": [],
@@ -137,6 +137,16 @@
     "#pprint(sorting_list)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a0f0e9d2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sorting_list[:1]"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/notebooks/test_sorting_si101.ipynb b/notebooks/test_sorting_si101.ipynb
new file mode 100644
index 0000000..0564018
--- /dev/null
+++ b/notebooks/test_sorting_si101.ipynb
@@ -0,0 +1,376 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, sys\n",
+    "import spikeinterface.full as si\n",
+    "import h5py\n",
+    "import numpy as np\n",
+    "from tqdm import tqdm\n",
+    "from glob import glob\n",
+    "sys.path.append(\"/home/phornauer/Git/axon_tracking/\")\n",
+    "from axon_tracking import spike_sorting as ss\n",
+    "from axon_tracking import template_extraction as te\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "te_params = dict()\n",
+    "te_params['align_cutout'] = True #Align waveforms by max waveform peak\n",
+    "te_params['upsample'] = 2 #Factor by which to upsample waveforms\n",
+    "te_params['rm_outliers'] = True #Check if outliers should be removed\n",
+    "te_params['n_jobs'] = 16 #Number of cores to use for waveform extraction\n",
+    "te_params['n_neighbors'] = 10 #Number of neighbors for outlier detection\n",
+    "te_params['peak_cutout'] = 2 #Looking for peak +- this value around the expected peak (removing minor offsets)\n",
+    "te_params['overwrite_wf'] = False #Flag if waveform extraction should be repeated (e.g. different cutouts)\n",
+    "te_params['overwrite_tmp'] = True #Flag if templates should be recalculated if already existing\n",
+    "\n",
+    "qc_params = dict()\n",
+    "qc_params['min_n_spikes'] = 500 #Minimum number of spikes to be detected for a unit for template extraction to take place\n",
+    "qc_params['exclude_mua'] = True #Exclude units that were labelled multi unit activity by kilosort"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sorting_list = ['/net/bs-filesvr02/export/group/hierlemann/intermediate_data/Maxtwo/phornauer/iNeurons/240618/T002523/AxonTracking/well006']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for sorting_path in tqdm(sorting_list):\n",
+    "    output_path = os.path.join(sorting_path, \"sorter_output\")\n",
+    "    sorting = si.KiloSortSortingExtractor(output_path)\n",
+    "    json_path = os.path.join(sorting_path, \"spikeinterface_recording.json\")\n",
+    "    multirecording = si.load_extractor(json_path, base_folder=True)\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rec_path = ss.get_recording_path(multirecording)\n",
+    "stream_id = [p for p in sorting_path.split(\"/\") if p.startswith(\"well\")][\n",
+    "    0\n",
+    "]  # Find out which well this belongs to\n",
+    "\n",
+    "rec_names, common_el, pos = ss.find_common_electrodes(rec_path, stream_id)\n",
+    "cleaned_sorting = te.select_good_units(sorting, **qc_params)\n",
+    "cleaned_sorting = si.remove_excess_spikes(\n",
+    "    cleaned_sorting, multirecording\n",
+    ")  # Relevant if last spike time == recording_length\n",
+    "cleaned_sorting.register_recording(multirecording)\n",
+    "segment_sorting = si.SplitSegmentSorting(cleaned_sorting, multirecording)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_path = '/net/bs-filesvr02/export/group/hierlemann/intermediate_data/Maxtwo/phornauer/AxonScan/Test'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sel_unit_ids = segment_sorting.get_unit_ids()\n",
+    "template_save_path = os.path.join(output_path, \"templates\")\n",
+    "if not os.path.exists(template_save_path):\n",
+    "    os.makedirs(template_save_path)\n",
+    "\n",
+    "full_path = ss.get_recording_path(segment_sorting)\n",
+    "cutout_samples, cutout_ms = te.get_assay_information(full_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "full_path = ss.get_recording_path(segment_sorting)\n",
+    "\n",
+    "h5 = h5py.File(full_path)\n",
+    "rec_names = list(h5[\"wells\"][stream_id].keys())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for sel_idx, rec_name in enumerate(rec_names):\n",
+    "    wf_path = os.path.join(output_path, \"waveforms\", \"seg\" + str(sel_idx))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rec = si.MaxwellRecordingExtractor(\n",
+    "        full_path, stream_id=stream_id, rec_name=rec_name\n",
+    "    )\n",
+    "chunk_size = (\n",
+    "    np.min([10000, rec.get_num_samples()]) - 100\n",
+    ")  # Fallback for ultra short recordings (too little activity)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rec_centered = si.bandpass_filter(rec, freq_min=300, freq_max=4999)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seg_sort = si.SelectSegmentSorting(segment_sorting, sel_idx)\n",
+    "seg_sort = si.remove_excess_spikes(seg_sort, rec_centered)\n",
+    "seg_sort.register_recording(rec_centered)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "overwrite_wf = te_params[\"overwrite_wf\"]\n",
+    "cutout = cutout_ms\n",
+    "n_jobs = te_params[\"n_jobs\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cutout_samples, cutout_ms = te.get_assay_information(full_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "segment_sorting"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "h5 = h5py.File(full_path)\n",
+    "rec_names = list(h5[\"wells\"][stream_id].keys())\n",
+    "n_units = seg_sort.get_num_units()\n",
+    "template_matrix = np.full([n_units, sum(cutout_samples), 26400], np.nan)\n",
+    "\n",
+    "for sel_idx, rec_name in enumerate(rec_names):\n",
+    "    rec = si.MaxwellRecordingExtractor(\n",
+    "            full_path, stream_id=stream_id, rec_name=rec_name\n",
+    "        )\n",
+    "    \n",
+    "    rec_centered = si.bandpass_filter(rec, freq_min=300, freq_max=4999)\n",
+    "    \n",
+    "    seg_sort = si.SelectSegmentSorting(segment_sorting, sel_idx)\n",
+    "    \n",
+    "    \n",
+    "    analyzer = si.create_sorting_analyzer(\n",
+    "        sorting=seg_sort,\n",
+    "        recording=rec_centered,\n",
+    "        sparse=False,\n",
+    "        overwrite=overwrite_wf\n",
+    "        )\n",
+    "\n",
+    "    analyzer.compute(\"random_spikes\",n_jobs=n_jobs,max_spikes_per_unit=900)\n",
+    "    analyzer.compute(\"waveforms\",ms_before=cutout[0], ms_after=cutout[1],n_jobs=n_jobs)\n",
+    "    analyzer.compute(\"templates\",n_jobs=n_jobs)\n",
+    "    tmp = analyzer.get_extension(\n",
+    "        extension_name=\"templates\"\n",
+    "    )\n",
+    "    tmp_data = tmp.get_data()\n",
+    "    \n",
+    "    els = rec.get_property(\"contact_vector\")[\"electrode\"]\n",
+    "    template_matrix[:, :, els] = tmp_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.plot(np.squeeze(template_matrix[23, :, :]))\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grid = te.convert_to_grid(template_matrix[23,:,:], pos)\n",
+    "fig, ax = plt.subplots()\n",
+    "ax.imshow(np.max(np.abs(grid),axis=2).T,vmax=20)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import h5py as h5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mxw = h5.File(full_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mxw.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mxw['recordings']['rec0000']['well006'].keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mxw['recordings']['rec0000']['well006']['spikes']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "34897 * 65"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mxw['recordings']['rec0000']['well006']['groups']['routed'].keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mxw['recordings']['rec0000']['well006']['groups']['routed']['raw']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mxw['recordings']['rec0000']['well006']['events']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mxw['assay']['script_id'].keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mxw['assay']['inputs']['electrodes'][0]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "si101",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}