diff --git a/README.md b/README.md index 8d74d627..96aedec5 100644 --- a/README.md +++ b/README.md @@ -10,11 +10,11 @@ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/) [![pypiv](https://img.shields.io/pypi/v/massdash.svg)](https://pypi.python.org/pypi/massdash) -[![continuous-integration](https://github.com/Roestlab/massdash/workflows/continuous-integration/badge.svg)](https://github.com/Roestlab/massdash/actions) [![pypidownload](https://img.shields.io/pypi/dm/massdash?color=orange)](https://pypistats.org/packages/massdash) [![biocondav](https://img.shields.io/conda/v/bioconda/massdash?label=bioconda&color=purple)](https://anaconda.org/bioconda/massdash) [![dockerv](https://img.shields.io/docker/v/singjust/massdash?label=docker&color=green)](https://hub.docker.com/r/singjust/massdash) [![dockerpull](https://img.shields.io/docker/pulls/singjust/massdash?color=green)](https://hub.docker.com/r/singjust/massdash) +[![continuous-integration](https://github.com/Roestlab/massdash/workflows/continuous-integration/badge.svg)](https://github.com/Roestlab/massdash/actions) [![readthedocs](https://img.shields.io/readthedocs/massdash)](https://massdash.readthedocs.io/en/latest/index.html) [![Licence](https://img.shields.io/badge/License-BSD_3--Clause-orange.svg)](https://raw.githubusercontent.com/RoestLab/massdash/main/LICENSE) diff --git a/massdash/constants.py b/massdash/constants.py index 7f44dcc4..5eba91ef 100644 --- a/massdash/constants.py +++ b/massdash/constants.py @@ -23,4 +23,5 @@ URL_TEST_OSW = "https://github.com/Roestlab/massdash/raw/dev/test/test_data/example_dia/openswath/osw/test.osw" URL_TEST_PQP = "https://github.com/Roestlab/massdash/raw/dev/test/test_data/example_dia/openswath/lib/test.pqp" URL_TEST_RAW_MZML = "https://github.com/Roestlab/massdash/raw/dev/test/test_data/example_dia/raw/test_raw_1.mzML" -URL_TEST_DREAMDIA_REPORT = "https://github.com/Roestlab/massdash/raw/dev/test/test_data/example_dia/dreamdia/test_dreamdia_report.tsv" \ No newline at end of file +URL_TEST_DREAMDIA_REPORT = "https://github.com/Roestlab/massdash/raw/dev/test/test_data/example_dia/dreamdia/test_dreamdia_report.tsv" +URL_PRETRAINED_CONFORMER = "https://github.com/Roestlab/massdash/releases/download/v0.0.1-alpha/base_cape.onnx" \ No newline at end of file diff --git a/massdash/loaders/SqMassLoader.py b/massdash/loaders/SqMassLoader.py index daf82cec..a2851616 100644 --- a/massdash/loaders/SqMassLoader.py +++ b/massdash/loaders/SqMassLoader.py @@ -81,7 +81,7 @@ def loadTransitionGroups(self, pep_id: str, charge: int) -> Dict[str, Transition prec_chrom_ids = t.getPrecursorChromIDs(precursor_id) precursor_chroms = t.getDataForChromatograms(prec_chrom_ids['chrom_ids'], prec_chrom_ids['native_ids']) - out[t] = TransitionGroup(precursor_chroms, transition_chroms) + out[t] = TransitionGroup(precursor_chroms, transition_chroms, pep_id, charge) return out def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int) -> pd.DataFrame: diff --git a/massdash/loaders/access/MzMLDataAccess.py b/massdash/loaders/access/MzMLDataAccess.py index 330655b2..f3fcb985 100644 --- a/massdash/loaders/access/MzMLDataAccess.py +++ b/massdash/loaders/access/MzMLDataAccess.py @@ -420,7 +420,7 @@ def msExperimentToFeatureMap(self, msExperiment: po.MSExperiment, feature: Trans else: LOGGER.warn(f"No spectra found for peptide: {feature.sequence}{feature.precursor_charge}. Try adjusting the extraction parameters") - return FeatureMap(results_df, config) + return FeatureMap(results_df, feature.sequence, feature.precursor_charge, config) def _find_closest_reference_mz(self, given_mz: np.array, reference_mz_values: np.array, peptide_product_annotation_list: np.array) -> np.array: """ diff --git a/massdash/peakPickers/ConformerPeakPicker.py b/massdash/peakPickers/ConformerPeakPicker.py index e962b766..a5387d6a 100644 --- a/massdash/peakPickers/ConformerPeakPicker.py +++ b/massdash/peakPickers/ConformerPeakPicker.py @@ -11,8 +11,10 @@ # Structs from ..structs.TransitionGroup import TransitionGroup from ..structs.TransitionGroupFeature import TransitionGroupFeature +from ..loaders.SpectralLibraryLoader import SpectralLibraryLoader # Utils from ..util import check_package +from ..util import LOGGER onnxruntime, ONNXRUNTIME_AVAILABLE = check_package("onnxruntime") @@ -35,7 +37,7 @@ class ConformerPeakPicker: _convertConformerFeatureToTransitionGroupFeatures: Convert conformer predicted feature to TransitionGroupFeatures. """ - def __init__(self, transition_group: TransitionGroup, pretrained_model_file: str, window_size: int = 175, prediction_threshold: float = 0.5, prediction_type: str = "logits"): + def __init__(self, library_file: str, pretrained_model_file: str, prediction_threshold: float = 0.5, prediction_type: str = "logits"): """ Initialize the ConformerPeakPicker class. @@ -46,14 +48,18 @@ def __init__(self, transition_group: TransitionGroup, pretrained_model_file: str prediction_threshold (float, optional): The prediction threshold for peak picking. Defaults to 0.5. prediction_type (str, optional): The prediction type for peak picking. Defaults to "logits". """ - self.transition_group = transition_group self.pretrained_model_file = pretrained_model_file - self.window_size = window_size self.prediction_threshold = prediction_threshold self.prediction_type = prediction_type - self.onnx_session = None + self.library = SpectralLibraryLoader(library_file) self._validate_model() + + ## set in load_model + self.onnx_session = None + self.window_size = None + + LOGGER.name = __class__.__name__ def _validate_model(self): """ @@ -73,8 +79,14 @@ def load_model(self): raise ImportError("onnxruntime is required for loading the pretrained Conformer model, but not installed.") # Load pretrained model self.onnx_session = onnxruntime.InferenceSession(self.pretrained_model_file) + if len(self.onnx_session.get_inputs()) == 0: + raise ValueError("Pretrained model does not have any inputs.") + elif len(self.onnx_session.get_inputs()[0].shape) != 3: + raise ValueError("First input to model must be a 3D numpy array, current shape: {}".format(len(self.onnx_session.get_inputs()[0].shape))) + else: + self.window_size = self.onnx_session.get_inputs()[0].shape[2] - def pick(self, max_int_transition: int=1000) -> List[TransitionGroupFeature]: + def pick(self, transition_group, max_int_transition: int=1000) -> List[TransitionGroupFeature]: """ Perform peak picking. @@ -85,19 +97,19 @@ def pick(self, max_int_transition: int=1000) -> List[TransitionGroupFeature]: List[TransitionGroupFeature]: The list of transition group features. """ # Transform data into required input - print("Preprocessing data...") - conformer_preprocessor = ConformerPreprocessor(self.transition_group) - input_data = conformer_preprocessor.preprocess() - print("Loading model...") + LOGGER.info("Loading model...") self.load_model() - print("Predicting...") + LOGGER.info("Preprocessing data...") + conformer_preprocessor = ConformerPreprocessor(transition_group, self.window_size) + input_data = conformer_preprocessor.preprocess(self.library) + LOGGER.info("Predicting...") ort_input = {self.onnx_session.get_inputs()[0].name: input_data} ort_output = self.onnx_session.run(None, ort_input) - print("Getting predicted boundaries...") + LOGGER.info("Getting predicted boundaries...") peak_info = conformer_preprocessor.find_top_peaks(ort_output[0], ["precursor"], self.prediction_threshold, self.prediction_type) # Get actual peak boundaries - peak_info = conformer_preprocessor.get_peak_boundaries(peak_info, self.transition_group, self.window_size) - print(f"Peak info: {peak_info}") + peak_info = conformer_preprocessor.get_peak_boundaries(peak_info) + LOGGER.info(f"Peak info: {peak_info}") return self._convertConformerFeatureToTransitionGroupFeatures(peak_info, max_int_transition) def _convertConformerFeatureToTransitionGroupFeatures(self, peak_info: dict, max_int_transition: int=1000) -> List[TransitionGroupFeature]: diff --git a/massdash/preprocess/ConformerPreprocessor.py b/massdash/preprocess/ConformerPreprocessor.py index ea29f1ff..320f7e2c 100644 --- a/massdash/preprocess/ConformerPreprocessor.py +++ b/massdash/preprocess/ConformerPreprocessor.py @@ -10,6 +10,7 @@ from .GenericPreprocessor import GenericPreprocessor # Structs from ..structs.TransitionGroup import TransitionGroup +from ..loaders.SpectralLibraryLoader import SpectralLibraryLoader # Utils from ..util import check_package @@ -36,9 +37,13 @@ class ConformerPreprocessor(GenericPreprocessor): """ - def __init__(self, transition_group: TransitionGroup): + def __init__(self, transition_group: TransitionGroup, window_size: int=175): super().__init__(transition_group) + ## pad the transition group to the window size + self.transition_group = self.transition_group.adjust_length(window_size) + self.window_size = window_size + @staticmethod def min_max_scale(data, min: float=None, max: float=None) -> np.ndarray: """ @@ -101,14 +106,14 @@ def sigmoid(x: np.ndarray) -> np.ndarray: """ return 1 / (1 + np.exp(-x)) - def preprocess(self, window_size: int=175) -> np.ndarray: + def preprocess(self, library: SpectralLibraryLoader) -> np.ndarray: """ Preprocesses the data by scaling and transforming it into a numpy array. Code adapted from CAPE Args: - window_size (int): The desired window size for trimming the data. Default is 175. + SpectralLibraryLoader (SpectralLibraryLoader): The spectral library loader. Returns: np.ndarray: The preprocessed data as a numpy array with shape (1, 21, len(data[0])). @@ -122,17 +127,19 @@ def preprocess(self, window_size: int=175) -> np.ndarray: # Row index 19: library retention time diff # Row index 20: precursor charge - # initialize empty numpy array - data = np.empty((0, len(self.transition_group.transitionData[0].intensity)), float) + if len(self.transition_group.transitionData) != 6: + raise ValueError(f"Transition group must have 6 transitions, but has {len(self.transition_group.transitionData)}.") - lib_int_data = np.empty((0, len(self.transition_group.transitionData[0].intensity)), float) + # initialize empty numpy array + data = np.empty((0, self.window_size), float) + lib_int_data = np.empty((0, self.window_size), float) for chrom in self.transition_group.transitionData: # append ms2 intensity data to data data = np.append(data, [chrom.intensity], axis=0) - lib_int = self.transition_group.targeted_transition_list[self.transition_group.targeted_transition_list.Annotation==chrom.label]['LibraryIntensity'].values - lib_int = np.repeat(lib_int, len(chrom.intensity)) + lib_int = library.get_fragment_library_intensity(self.transition_group.sequence, self.transition_group.precursor_charge, chrom.label) + lib_int = np.repeat(lib_int, self.window_size) lib_int_data = np.append(lib_int_data, [lib_int], axis=0) # initialize empty numpy array to store scaled data @@ -148,20 +155,7 @@ def preprocess(self, window_size: int=175) -> np.ndarray: ) ## MS1 trace data - # padd precursor intensity data with zeros to match ms2 intensity data - len_trans = len(self.transition_group.transitionData[0].intensity) - len_prec = len(self.transition_group.precursorData[0].intensity) - if len_prec!=len_trans: - if len_prec < len_trans: - prec_int = np.pad(self.transition_group.precursorData[0].intensity, (0, len_trans-len_prec), 'constant', constant_values=(0, 0)) - if len_prec > len_trans: - prec_int = self.transition_group.precursorData[0].intensity - # compute number of points to trim from either side of the middle point - remove_n_points = len_prec - len_trans - # trim precursor intensity data - prec_int = prec_int[remove_n_points//2:-remove_n_points//2] - else: - prec_int = self.transition_group.precursorData[0].intensity + prec_int = self.transition_group.precursorData[0].intensity # append ms1 intensity data to data new_data[12] = self.min_max_scale(prec_int) @@ -190,18 +184,11 @@ def preprocess(self, window_size: int=175) -> np.ndarray: new_data[19] = tmp_arr ## Add charge state - new_data[20] = self.transition_group.targeted_transition_list.PrecursorCharge.values[0] * np.ones(len(data[0])) + new_data[20] = self.transition_group.precursor_charge * np.ones(len(data[0])) ## Convert to float32 new_data = new_data.astype(np.float32) - ## trim data if does not match window size starting at the centre - if len(new_data[0]) > window_size: - middle_index = len(data[0]) // 2 - trim_start = middle_index - (window_size // 2) - trim_end = middle_index + (window_size // 2) + 1 - new_data = new_data[:, trim_start:trim_end] - # cnvert the shape to be (1, 21, len(data[0])) new_data = np.expand_dims(new_data, axis=0) @@ -297,36 +284,35 @@ def find_top_peaks(self, preds, seq_classes: List[str]='input_precursor', thresh return peak_info - def get_peak_boundaries(self, peak_info: dict, tr_group: TransitionGroup, window_size: int=175): + def get_peak_boundaries(self, peak_info: dict): """ Adjusts the peak boundaries in the peak_info dictionary based on the window size and the dimensions of the input rt_array. Calculates the actual RT values from the rt_array and appends them to the peak_info dictionary. Args: peak_info (dict): A dictionary containing information about the peaks. - tr_group (TransitionGroup): The transition group containing the data. window_size (int, optional): The size of the window used for trimming the rt_array. Defaults to 175. Returns: dict: The updated peak_info dictionary with adjusted peak boundaries and RT values. """ - rt_array = tr_group.transitionData[0].data - if rt_array.shape[0] != window_size: - print(f"input_data {rt_array.shape[0]} was trimmed to {window_size}, adjusting peak_info indexes to map to the original datas dimensions") + rt_array = self.transition_group.transitionData[0].data + if rt_array.shape[0] != self.window_size: + print(f"input_data {rt_array.shape[0]} was trimmed to {self.window_size}, adjusting peak_info indexes to map to the original datas dimensions") for key in peak_info.keys(): for i in range(len(peak_info[key])): peak_info[key][i]['max_idx_org'] = peak_info[key][i]['max_idx'] peak_info[key][i]['start_idx_org'] = peak_info[key][i]['start_idx'] peak_info[key][i]['end_idx_org'] = peak_info[key][i]['end_idx'] - new_max_idx = peak_info[key][i]['max_idx'] + (window_size // 2) - (rt_array.shape[0] // 2) + new_max_idx = peak_info[key][i]['max_idx'] + (self.window_size // 2) - (rt_array.shape[0] // 2) if not new_max_idx < 0: peak_info[key][i]['max_idx'] = new_max_idx - new_start_idx = peak_info[key][i]['start_idx'] + (window_size // 2) - (rt_array.shape[0] // 2) + new_start_idx = peak_info[key][i]['start_idx'] + (self.window_size // 2) - (rt_array.shape[0] // 2) if not new_start_idx < 0: peak_info[key][i]['start_idx'] = new_start_idx - peak_info[key][i]['end_idx'] = peak_info[key][i]['end_idx'] + (window_size // 2) - (rt_array.shape[0] // 2) + peak_info[key][i]['end_idx'] = peak_info[key][i]['end_idx'] + (self.window_size // 2) - (rt_array.shape[0] // 2) # get actual RT value from RT array and append to peak_info for key in peak_info.keys(): @@ -334,6 +320,6 @@ def get_peak_boundaries(self, peak_info: dict, tr_group: TransitionGroup, window peak_info[key][i]['rt_apex'] = rt_array[peak_info[key][i]['max_idx']] peak_info[key][i]['rt_start'] = rt_array[peak_info[key][i]['start_idx']] peak_info[key][i]['rt_end'] = rt_array[peak_info[key][i]['end_idx']] - peak_info[key][i]['int_apex'] = np.max([tg.intensity[peak_info[key][i]['max_idx']] for tg in tr_group.transitionData]) + peak_info[key][i]['int_apex'] = np.max([tg.intensity[peak_info[key][i]['max_idx']] for tg in self.transition_group.transitionData]) return peak_info \ No newline at end of file diff --git a/massdash/server/ExtractedIonChromatogramAnalysisServer.py b/massdash/server/ExtractedIonChromatogramAnalysisServer.py index 43949e65..e65babe1 100644 --- a/massdash/server/ExtractedIonChromatogramAnalysisServer.py +++ b/massdash/server/ExtractedIonChromatogramAnalysisServer.py @@ -163,10 +163,10 @@ def main(self): tr_group.targeted_transition_list = transition_list_ui.target_transition_list print(f"Pretrained model file: {peak_picking_settings.peak_picker_algo_settings.pretrained_model_file}") - peak_picker = ConformerPeakPicker(tr_group, peak_picking_settings.peak_picker_algo_settings.pretrained_model_file, window_size=peak_picking_settings.peak_picker_algo_settings.conformer_window_size, prediction_threshold=peak_picking_settings.peak_picker_algo_settings.conformer_prediction_threshold, prediction_type=peak_picking_settings.peak_picker_algo_settings.conformer_prediction_type) + peak_picker = ConformerPeakPicker(self.massdash_gui.file_input_settings.osw_file_path, peak_picking_settings.peak_picker_algo_settings.pretrained_model_file, window_size=peak_picking_settings.peak_picker_algo_settings.conformer_window_size, prediction_threshold=peak_picking_settings.peak_picker_algo_settings.conformer_prediction_threshold, prediction_type=peak_picking_settings.peak_picker_algo_settings.conformer_prediction_type) # get the trantition in tr_group with the max intensity max_int_transition = np.max([transition.intensity for transition in tr_group.transitionData]) - peak_features = peak_picker.pick(max_int_transition) + peak_features = peak_picker.pick(tr_group, max_int_transition) tr_group_feature_data[file.filename] = peak_features st.write(f"Performing Conformer Peak Picking... Elapsed time: {elapsed_time()}") else: diff --git a/massdash/server/OneDimensionPlotterServer.py b/massdash/server/OneDimensionPlotterServer.py index 11e7570a..59e75d67 100644 --- a/massdash/server/OneDimensionPlotterServer.py +++ b/massdash/server/OneDimensionPlotterServer.py @@ -46,12 +46,13 @@ class OneDimensionPlotterServer: def __init__(self, feature_map_dict: Dict[str, FeatureMap], transition_list_ui: TransitionListUISettings, chrom_plot_settings: ChromatogramPlotUISettings, - peak_picking_settings: PeakPickingUISettings, + peak_picking_settings: PeakPickingUISettings, spectral_library_path: str=None, verbose: bool=False): self.feature_map_dict = feature_map_dict self.transition_list_ui = transition_list_ui self.chrom_plot_settings = chrom_plot_settings self.peak_picking_settings = peak_picking_settings + self.spectral_library_path = spectral_library_path self.plot_obj_dict = {} self.verbose = verbose @@ -74,7 +75,7 @@ def generate_chromatogram_plots(self): tr_group = feature_map.to_chromatograms() # Perform peak picking if enabled peak_picker = PeakPickingServer(self.peak_picking_settings, self.chrom_plot_settings) - tr_group_feature_data = peak_picker.perform_peak_picking(tr_group_data={'tmp':tr_group}, transition_list_ui=self.transition_list_ui) + tr_group_feature_data = peak_picker.perform_peak_picking(tr_group_data={'tmp':tr_group}, transition_list_ui=self.transition_list_ui, spec_lib=self.spectral_library_path) plot_settings_dict = self._get_plot_settings('Retention Time (s)', 'Intensity', file, 'chromatogram') plot_obj = self._generate_plot(tr_group, plot_settings_dict, tr_group_feature_data['tmp']) run_plots_list.append(plot_obj) diff --git a/massdash/server/PeakPickingServer.py b/massdash/server/PeakPickingServer.py index a603764b..2725b0f8 100644 --- a/massdash/server/PeakPickingServer.py +++ b/massdash/server/PeakPickingServer.py @@ -3,6 +3,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ +import numpy as np import streamlit as st from typing import Literal @@ -14,8 +15,7 @@ # Structs from ..structs.TransitionGroup import TransitionGroup # Peak Picking -from ..peakPickers.pyMRMTransitionGroupPicker import pyMRMTransitionGroupPicker -from ..peakPickers.MRMTransitionGroupPicker import MRMTransitionGroupPicker +from ..peakPickers import pyMRMTransitionGroupPicker, MRMTransitionGroupPicker, ConformerPeakPicker # Util from ..util import time_block from .util import get_string_mslevels_from_bool @@ -103,7 +103,37 @@ def perform_mrmtransitiongrouppicker_peak_picking(self, tr_group_data: Transitio st.write(f"Performing MRMTransitionGroupPicker Peak Picking... Elapsed time: {elapsed_time()}") return tr_group_feature_data - def perform_peak_picking(self, tr_group_data: TransitionGroup=None, xic_data: SqMassLoader=None, transition_list_ui: Literal['ExtractedIonChromatogramAnalysisUI', 'RawTargetedExtractionAnalysisUI']=None): + def perform_conformer_peak_picking(self, spec_lib_path: str, tr_group_data: TransitionGroup): + """ + Performs peak picking using ConformerPeakPicker algorithm. + + Args: + tr_group_data (dict): The transition group data. + spec_lib_path (str): The path to the spectral library. + + Returns: + dict: The transition group feature data. + """ + with time_block() as elapsed_time: + # Peak picking using Conformer + tr_group_feature_data = {} + + for file, tr_group in tr_group_data.items(): + #tr_group.targeted_transition_list = self.transition_list_ui.target_transition_list + st.write(f"Pretrained model file: {self.peak_picking_settings.peak_picker_algo_settings.pretrained_model_file}") + + peak_picker = ConformerPeakPicker(spec_lib_path, self.peak_picking_settings.peak_picker_algo_settings.pretrained_model_file, + prediction_threshold=self.peak_picking_settings.peak_picker_algo_settings.conformer_prediction_threshold, + prediction_type=self.peak_picking_settings.peak_picker_algo_settings.conformer_prediction_type) + + # get the trantition in tr_group with the max intensity + max_int_transition = np.max([transition.intensity for transition in tr_group.transitionData]) + peak_features = peak_picker.pick(tr_group, max_int_transition=max_int_transition) + tr_group_feature_data[file] = peak_features + st.write(f"Performing Conformer Peak Picking... Elapsed time: {elapsed_time()}") + return tr_group_feature_data + + def perform_peak_picking(self, tr_group_data: TransitionGroup=None, xic_data: SqMassLoader=None, transition_list_ui: Literal['ExtractedIonChromatogramAnalysisUI', 'RawTargetedExtractionAnalysisUI']=None, spec_lib: str=None): """ Performs peak picking based on the selected method. @@ -111,6 +141,7 @@ def perform_peak_picking(self, tr_group_data: TransitionGroup=None, xic_data: Sq tr_group_data (dict, optional): The transition group data. Defaults to None. xic_data (object, optional): The XIC data. Defaults to None. transition_list_ui (object, optional): The transition list UI. Defaults to None. + spec_lib (object, optional): The spectral library. Defaults to None. Mandatory if peak picking using ConformerPeakPicker. Returns: dict: The transition group feature data. @@ -124,6 +155,8 @@ def perform_peak_picking(self, tr_group_data: TransitionGroup=None, xic_data: Sq tr_group_feature_data = self.perform_pypeakpicker_mrm_peak_picking(tr_group_data) elif self.peak_picking_settings.do_peak_picking == 'MRMTransitionGroupPicker': tr_group_feature_data = self.perform_mrmtransitiongrouppicker_peak_picking(tr_group_data) + elif self.peak_picking_settings.do_peak_picking == 'Conformer': + tr_group_feature_data = self.perform_conformer_peak_picking(spec_lib, tr_group_data) else: tr_group_feature_data = {file: None for file in tr_group_data.keys()} diff --git a/massdash/server/RawTargetedExtractionAnalysisServer.py b/massdash/server/RawTargetedExtractionAnalysisServer.py index 21546301..6495f2b5 100644 --- a/massdash/server/RawTargetedExtractionAnalysisServer.py +++ b/massdash/server/RawTargetedExtractionAnalysisServer.py @@ -202,7 +202,7 @@ def main(self): # Initialize plot object dictionary plot_obj_dict = {} if chrom_plot_settings.display_plot_dimension_type == "1D": - plot_obj_dict = OneDimensionPlotterServer(featureMaps, transition_list_ui, chrom_plot_settings, peak_picking_settings, self.massdash_gui.verbose).generate_chromatogram_plots().plot_obj_dict + plot_obj_dict = OneDimensionPlotterServer(featureMaps, transition_list_ui, chrom_plot_settings, peak_picking_settings, self.massdash_gui.file_input_settings.transition_list_file_path, self.massdash_gui.verbose).generate_chromatogram_plots().plot_obj_dict elif chrom_plot_settings.display_plot_dimension_type == "2D": plot_obj_dict = TwoDimensionPlotterServer(featureMaps, transition_list_ui, chrom_plot_settings).generate_two_dimensional_plots().plot_obj_dict elif chrom_plot_settings.display_plot_dimension_type == "3D": diff --git a/massdash/structs/Chromatogram.py b/massdash/structs/Chromatogram.py index 94c20262..951ceb60 100644 --- a/massdash/structs/Chromatogram.py +++ b/massdash/structs/Chromatogram.py @@ -4,7 +4,7 @@ """ import pyopenms as po -from typing import Optional, Tuple, List +from typing import Optional import pandas as pd # Structs @@ -29,4 +29,17 @@ def to_pyopenms(self, id: Optional[str] = None): return chrom def toPandasDf(self) -> pd.DataFrame: - return super().toPandasDfHelper_('rt') \ No newline at end of file + return super().toPandasDfHelper_('rt') + + def adjust_length(self, length: int) -> 'Chromatogram': + """ + Adjust the length of the chromatogram to a given length, this involved either padding or truncating the chromatogram + + Args: + length (int): The desired output length. + + Returns: + Chromatogram: A new chromatogram object with padded/truncated rt and intensity. + """ + new_data, new_intensity = super().adjust_length(length) + return Chromatogram(new_data, new_intensity, self.label) \ No newline at end of file diff --git a/massdash/structs/Data1D.py b/massdash/structs/Data1D.py index 353527f8..42f4fac1 100644 --- a/massdash/structs/Data1D.py +++ b/massdash/structs/Data1D.py @@ -100,6 +100,65 @@ def median(self, boundary: Optional[Tuple[float, float]] = None) -> float: else: return np.median(self.intensity) + def adjust_length(self, length): + """ + Adjusts the length of the Data1D object. + + If the length is smaller than the current length, the data will be sliced to the given length. + If the length is larger than the current length, the data will be padded with zeros on both sides. + + E.g. if the data array is [1, 2, 3] and the desired length is 7, + the returned array will be [0, 0, 1, 2, 3, 0, 0]. + + E.g. if the data array is [1, 2, 3] and the desired length is 1, + the returned data array will be [1]. + + Pad the data and intensity arrays with zeros to a given length. Modifies the object in place. + + Args: + length (int): The length of the output array + + Returns: + (new_data, new_intensity) : tuple of padded/truncated data and intensity + + """ + + #### need to slice the array + if length == len(self.data): + new_data = self.data + new_intensity = self.intensity + elif length < len(self.data): + if length % 2 == 0: + slice_left = slice_right = length // 2 + else: # length % 2 == 1 + slice_left = length // 2 + 1 + slice_right = length // 2 + new_data = self.data[slice_left:-slice_right] + new_intensity = self.intensity[slice_left:-slice_right] + else: # length > len(self.data): + ### infer the chromatogram step size + step = self.data[1] - self.data[0] + + both_even_or_odd = length % 2 == len(self.data) % 2 + if both_even_or_odd: + pad_left = pad_right = (length - len(self.data)) // 2 + + new_intensity = np.copy(self.intensity) + new_intensity = np.pad(new_intensity, (pad_left, pad_right), 'constant', constant_values=0) + else: + pad_left = (length - len(self.data)) // 2 + 1 + pad_right = (length - len(self.data)) // 2 + #### length is odd, unequal paddings ##### + + #### Pad the data to left and right #### + data_right = np.linspace(self.data[-1] + step, self.data[-1] + step * pad_right, num=pad_right) + data_left = np.linspace(self.data[0] - step * pad_left, self.data[0] - step, num=pad_left) + new_data = np.concatenate((data_left, self.data, data_right)) + new_intensity = np.copy(self.intensity) + new_intensity = np.pad(new_intensity, (pad_left, pad_right), 'constant', constant_values=0) + return (new_data, new_intensity) + + @abstractmethod def toPandasDf(self) -> pd.DataFrame: pass diff --git a/massdash/structs/FeatureMap.py b/massdash/structs/FeatureMap.py index f43305f2..fdaf8d3d 100644 --- a/massdash/structs/FeatureMap.py +++ b/massdash/structs/FeatureMap.py @@ -33,9 +33,11 @@ class FeatureMap: has_im (bool): A boolean indicating if the feature map has ion mobility data ''' - def __init__(self, feature_df: pd.DataFrame, config: TargetedDIAConfig=None, verbose: bool=False): + def __init__(self, feature_df: pd.DataFrame, sequence: str, precursor_charge: int, config: TargetedDIAConfig=None, verbose: bool=False): self.feature_df = feature_df self.has_im = 'im' in feature_df.columns and feature_df['im'].notnull().all() + self.sequence = sequence + self.precursor_charge = precursor_charge if not self.has_im and not self.feature_df.empty: self.feature_df.drop(columns=['im'], inplace=True) self.config = config @@ -100,7 +102,10 @@ def to_chromatograms(self) -> TransitionGroup: Returns: TransitionGroup: A TransitionGroup object storing chromatograms ''' - return TransitionGroup(self.get_precursor_chromatograms(), self.get_transition_chromatograms()) + tg = TransitionGroup(self.get_precursor_chromatograms(), self.get_transition_chromatograms()) + tg.sequence = self.sequence + tg.precursor_charge = self.precursor_charge + return tg def to_mobilograms(self) -> TransitionGroup: ''' @@ -109,7 +114,10 @@ def to_mobilograms(self) -> TransitionGroup: Returns: TransitionGroup: A TransitionGroup object storing mobilograms ''' - return TransitionGroup(self.get_precursor_mobilograms(), self.get_transition_mobilograms()) + tg = TransitionGroup(self.get_precursor_mobilograms(), self.get_transition_mobilograms()) + tg.sequence = self.sequence + tg.precursor_charge = self.precursor_charge + return tg def to_spectra(self) -> TransitionGroup: ''' @@ -118,8 +126,11 @@ def to_spectra(self) -> TransitionGroup: Returns: TransitionGroup: A TransitionGroup object storing spectra ''' - return TransitionGroup(self.get_precursor_spectra(), self.get_transition_spectra()) - + tg = TransitionGroup(self.get_precursor_spectra(), self.get_transition_spectra()) + tg.sequence = self.sequence + tg.precursor_charge = self.precursor_charge + return tg + def get_precursor_chromatograms(self) -> List[Chromatogram]: ''' Get a list of precursor chromatograms from the feature map diff --git a/massdash/structs/Mobilogram.py b/massdash/structs/Mobilogram.py index 889bb57c..b3622c30 100644 --- a/massdash/structs/Mobilogram.py +++ b/massdash/structs/Mobilogram.py @@ -3,7 +3,6 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from typing import Tuple import pandas as pd # Structs @@ -17,4 +16,17 @@ def __init__(self, im, intensity, label): super().__init__(im, intensity, label) def toPandasDf(self) -> pd.DataFrame: - return super().toPandasDfHelper_(self, 'im') \ No newline at end of file + return super().toPandasDfHelper_(self, 'im') + + def adjust_length(self, length: int) -> 'Mobilogram': + """ + Adjust the length of the mobilogram to a given length, this involved either padding or truncating the mobilogram + + Args: + length (int): The desired output length. + + Returns: + Mobilogram: A new Mobilogram object with padded/truncated driftTime and intensity. + """ + new_data, new_intensity = super().adjust_length(length) + return Mobilogram(new_data, new_intensity, self.label) \ No newline at end of file diff --git a/massdash/structs/Spectrum.py b/massdash/structs/Spectrum.py index 109e2600..88e2dd93 100644 --- a/massdash/structs/Spectrum.py +++ b/massdash/structs/Spectrum.py @@ -3,7 +3,6 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from typing import Tuple import pandas as pd # Structs @@ -17,4 +16,17 @@ def __init__(self, mz, intensity, label): super().__init__(mz, intensity, label) def toPandasDf(self) -> pd.DataFrame: - return super().toPandasDfHelper_(self, 'mz') \ No newline at end of file + return super().toPandasDfHelper_(self, 'mz') + + def adjust_length(self, length: int) -> 'Spectrum': + """ + Adjust the length of the spectrum to a given length, this involved either padding or truncating the spectrum + + Args: + length (int): The desired output length. + + Returns: + Spectrum: A new Spectrum object with padded/truncated length of mz and intensity. + """ + new_data, new_intensity = super().adjust_length(length) + return Spectrum(new_data, new_intensity, self.label) \ No newline at end of file diff --git a/massdash/structs/TransitionGroup.py b/massdash/structs/TransitionGroup.py index 1b7e5fea..59559898 100644 --- a/massdash/structs/TransitionGroup.py +++ b/massdash/structs/TransitionGroup.py @@ -151,6 +151,34 @@ def empty(self) -> bool: """ return not any(p.empty() for p in self.precursorData) and any(t.empty() for t in self.transitionData) + def adjust_length(self, length: int) -> None: + """ + Adjusts the length size of the chromatograms, mobilograms, and spectra. + + If the length is smaller than the current length, the data will be sliced to the given length. + If the length is larger than the current length, the data will be padded with zeros on both sides. + + E.g. if the data array is [1, 2, 3] and the desired length is 7, + the returned array will be [0, 0, 1, 2, 3, 0, 0]. + + E.g. if the data array is [1, 2, 3] and the desired length is 1, + the returned data array will be [1]. + + Args: + length (int): The length of the output array + + Returns: + TransitionGroup: A new TransitionGroup object with padded data and intensity. + """ + new_precursorData = [] + new_transitionData = [] + for c in self.precursorData: + new_precursorData.append(c.adjust_length(length)) + for c in self.transitionData: + new_transitionData.append(c.adjust_length(length)) + + return TransitionGroup(new_precursorData, new_transitionData, self.sequence, self.precursor_charge) + def plot(self, transitionGroupFeatures: Optional[List[TransitionGroupFeature]] = None, smoothing: Optional[Literal['none', 'sgolay', 'gaussian']] = 'none', diff --git a/massdash/ui/ConformerPickerUISettings.py b/massdash/ui/ConformerPickerUISettings.py index c8ec5f99..1cb906c7 100644 --- a/massdash/ui/ConformerPickerUISettings.py +++ b/massdash/ui/ConformerPickerUISettings.py @@ -9,6 +9,7 @@ # UI from .ChromatogramPlotUISettings import ChromatogramPlotUISettings # Utils +from ..constants import URL_PRETRAINED_CONFORMER from ..util import download_file DIRNAME = os.path.dirname(__file__) @@ -39,19 +40,17 @@ def create_ui(self, plot_settings: ChromatogramPlotUISettings): plot_settings : ChromatogramPlotUISettings The plot settings for the chromatogram. """ - self.shipped_model = st.sidebar.checkbox("Use shipped model", value=True, help="Use the shipped model.") + self.shipped_model = st.sidebar.checkbox("Use shipped model", value=True, help="Use the shipped model which picks peaks across 175 points") if self.shipped_model: self.pretrained_model_file = os.path.join(DIRNAME, '..', 'assets', 'models', 'conformer', 'base_cape.onnx') # Check if the model file exists if not os.path.exists(self.pretrained_model_file): with st.spinner(f"Downloading pretrained model: {self.pretrained_model_file}..."): tmp_download_folder = os.path.join(DIRNAME, '..', 'assets', 'models', 'conformer') - url_pretrained_conformer = "https://github.com/Roestlab/massdash/releases/download/v0.0.1-alpha/base_cape.onnx" - download_file(url_pretrained_conformer, tmp_download_folder) + download_file(URL_PRETRAINED_CONFORMER, tmp_download_folder) else: self.pretrained_model_file = st.sidebar.text_input("Pretrained model file", value="", help="The pretrained model file to use.") with st.sidebar.expander("Advanced settings"): - self.conformer_window_size = st.number_input("window size", value=175, help="The window size for the conformer model, i.e the number of points of the chromatogram.") self.conformer_prediction_threshold = st.number_input("prediction score threshold", value=0.2, help="The threshold for the conformer models prediction scores to find the top peak boundary.") self.conformer_prediction_type = st.selectbox("prediction type", options=["logits", "sigmoided", "binarized"], help="The type of prediction to use for finding the top peak.") \ No newline at end of file