From 16d5a4218fd2a858ff79b5c957c9fc03ba31f782 Mon Sep 17 00:00:00 2001 From: Philipp Hornauer Date: Fri, 11 Oct 2024 01:22:28 +0200 Subject: [PATCH] Implemented basic workflow compatible with new si version as a notebook --- .pre-commit-config.yaml | 9 + notebooks/part2_template-extraction.ipynb | 12 +- notebooks/test_sorting_si101.ipynb | 376 ++++++++++++++++++++++ 3 files changed, 396 insertions(+), 1 deletion(-) create mode 100644 notebooks/test_sorting_si101.ipynb diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3530f8b..367beb9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,14 @@ # .pre-commit-config.yaml repos: +- repo: local + hooks: + - id: jupyter-nb-clear-output + name: jupyter-nb-clear-output + files: \.ipynb$ + stages: [pre-commit] + language: system + entry: jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.1.0 # this is optional, use `pre-commit autoupdate` to get the latest rev! hooks: diff --git a/notebooks/part2_template-extraction.ipynb b/notebooks/part2_template-extraction.ipynb index c4c99c9..8f9c0c8 100755 --- a/notebooks/part2_template-extraction.ipynb +++ b/notebooks/part2_template-extraction.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "92f849ca-8cfa-47c9-a2a5-9a84c1b1ed3f", "metadata": {}, "outputs": [], @@ -137,6 +137,16 @@ "#pprint(sorting_list)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0f0e9d2", + "metadata": {}, + "outputs": [], + "source": [ + "sorting_list[:1]" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/notebooks/test_sorting_si101.ipynb b/notebooks/test_sorting_si101.ipynb new file mode 100644 index 0000000..0564018 --- /dev/null +++ b/notebooks/test_sorting_si101.ipynb @@ -0,0 +1,376 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys\n", + "import spikeinterface.full as si\n", + "import h5py\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "from glob import glob\n", + "sys.path.append(\"/home/phornauer/Git/axon_tracking/\")\n", + "from axon_tracking import spike_sorting as ss\n", + "from axon_tracking import template_extraction as te\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "te_params = dict()\n", + "te_params['align_cutout'] = True #Align waveforms by max waveform peak\n", + "te_params['upsample'] = 2 #Factor by which to upsample waveforms\n", + "te_params['rm_outliers'] = True #Check if outliers should be removed\n", + "te_params['n_jobs'] = 16 #Number of cores to use for waveform extraction\n", + "te_params['n_neighbors'] = 10 #Number of neighbors for outlier detection\n", + "te_params['peak_cutout'] = 2 #Looking for peak +- this value around the expected peak (removing minor offsets)\n", + "te_params['overwrite_wf'] = False #Flag if waveform extraction should be repeated (e.g. different cutouts)\n", + "te_params['overwrite_tmp'] = True #Flag if templates should be recalculated if already existing\n", + "\n", + "qc_params = dict()\n", + "qc_params['min_n_spikes'] = 500 #Minimum number of spikes to be detected for a unit for template extraction to take place\n", + "qc_params['exclude_mua'] = True #Exclude units that were labelled multi unit activity by kilosort" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sorting_list = ['/net/bs-filesvr02/export/group/hierlemann/intermediate_data/Maxtwo/phornauer/iNeurons/240618/T002523/AxonTracking/well006']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sorting_path in tqdm(sorting_list):\n", + " output_path = os.path.join(sorting_path, \"sorter_output\")\n", + " sorting = si.KiloSortSortingExtractor(output_path)\n", + " json_path = os.path.join(sorting_path, \"spikeinterface_recording.json\")\n", + " multirecording = si.load_extractor(json_path, base_folder=True)\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rec_path = ss.get_recording_path(multirecording)\n", + "stream_id = [p for p in sorting_path.split(\"/\") if p.startswith(\"well\")][\n", + " 0\n", + "] # Find out which well this belongs to\n", + "\n", + "rec_names, common_el, pos = ss.find_common_electrodes(rec_path, stream_id)\n", + "cleaned_sorting = te.select_good_units(sorting, **qc_params)\n", + "cleaned_sorting = si.remove_excess_spikes(\n", + " cleaned_sorting, multirecording\n", + ") # Relevant if last spike time == recording_length\n", + "cleaned_sorting.register_recording(multirecording)\n", + "segment_sorting = si.SplitSegmentSorting(cleaned_sorting, multirecording)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "output_path = '/net/bs-filesvr02/export/group/hierlemann/intermediate_data/Maxtwo/phornauer/AxonScan/Test'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sel_unit_ids = segment_sorting.get_unit_ids()\n", + "template_save_path = os.path.join(output_path, \"templates\")\n", + "if not os.path.exists(template_save_path):\n", + " os.makedirs(template_save_path)\n", + "\n", + "full_path = ss.get_recording_path(segment_sorting)\n", + "cutout_samples, cutout_ms = te.get_assay_information(full_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "full_path = ss.get_recording_path(segment_sorting)\n", + "\n", + "h5 = h5py.File(full_path)\n", + "rec_names = list(h5[\"wells\"][stream_id].keys())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sel_idx, rec_name in enumerate(rec_names):\n", + " wf_path = os.path.join(output_path, \"waveforms\", \"seg\" + str(sel_idx))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rec = si.MaxwellRecordingExtractor(\n", + " full_path, stream_id=stream_id, rec_name=rec_name\n", + " )\n", + "chunk_size = (\n", + " np.min([10000, rec.get_num_samples()]) - 100\n", + ") # Fallback for ultra short recordings (too little activity)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rec_centered = si.bandpass_filter(rec, freq_min=300, freq_max=4999)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "seg_sort = si.SelectSegmentSorting(segment_sorting, sel_idx)\n", + "seg_sort = si.remove_excess_spikes(seg_sort, rec_centered)\n", + "seg_sort.register_recording(rec_centered)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "overwrite_wf = te_params[\"overwrite_wf\"]\n", + "cutout = cutout_ms\n", + "n_jobs = te_params[\"n_jobs\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cutout_samples, cutout_ms = te.get_assay_information(full_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "segment_sorting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "h5 = h5py.File(full_path)\n", + "rec_names = list(h5[\"wells\"][stream_id].keys())\n", + "n_units = seg_sort.get_num_units()\n", + "template_matrix = np.full([n_units, sum(cutout_samples), 26400], np.nan)\n", + "\n", + "for sel_idx, rec_name in enumerate(rec_names):\n", + " rec = si.MaxwellRecordingExtractor(\n", + " full_path, stream_id=stream_id, rec_name=rec_name\n", + " )\n", + " \n", + " rec_centered = si.bandpass_filter(rec, freq_min=300, freq_max=4999)\n", + " \n", + " seg_sort = si.SelectSegmentSorting(segment_sorting, sel_idx)\n", + " \n", + " \n", + " analyzer = si.create_sorting_analyzer(\n", + " sorting=seg_sort,\n", + " recording=rec_centered,\n", + " sparse=False,\n", + " overwrite=overwrite_wf\n", + " )\n", + "\n", + " analyzer.compute(\"random_spikes\",n_jobs=n_jobs,max_spikes_per_unit=900)\n", + " analyzer.compute(\"waveforms\",ms_before=cutout[0], ms_after=cutout[1],n_jobs=n_jobs)\n", + " analyzer.compute(\"templates\",n_jobs=n_jobs)\n", + " tmp = analyzer.get_extension(\n", + " extension_name=\"templates\"\n", + " )\n", + " tmp_data = tmp.get_data()\n", + " \n", + " els = rec.get_property(\"contact_vector\")[\"electrode\"]\n", + " template_matrix[:, :, els] = tmp_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(np.squeeze(template_matrix[23, :, :]))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "grid = te.convert_to_grid(template_matrix[23,:,:], pos)\n", + "fig, ax = plt.subplots()\n", + "ax.imshow(np.max(np.abs(grid),axis=2).T,vmax=20)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import h5py as h5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mxw = h5.File(full_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mxw.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mxw['recordings']['rec0000']['well006'].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mxw['recordings']['rec0000']['well006']['spikes']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "34897 * 65" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mxw['recordings']['rec0000']['well006']['groups']['routed'].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mxw['recordings']['rec0000']['well006']['groups']['routed']['raw']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mxw['recordings']['rec0000']['well006']['events']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mxw['assay']['script_id'].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mxw['assay']['inputs']['electrodes'][0]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "si101", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}