Merge pull request #32 from jalew188/peptdeep_latest

Peptdeep latest
MannLabs · Oct 28, 2022 · a196076 · a196076
2 parents cb10e38 + 85ed0dc
commit a196076
Show file tree

Hide file tree

Showing 11 changed files with 805 additions and 279 deletions.
diff --git a/alphaviz/contrib/ms2_plot.py b/alphaviz/contrib/ms2_plot.py
@@ -116,9 +116,8 @@ def plot(self, plot_df, sequence, title,
     def _init_plot(self, title):
 
         self.fig = plotly.subplots.make_subplots(
-            rows=(
-                self.rows
-            ), cols=3, 
+            rows=self.rows,
+            cols=3, 
             shared_xaxes=True,
             specs=self.specs,
             vertical_spacing=self.vertical_spacing,
@@ -285,6 +284,7 @@ def _plot_frag_annotations(self, plot_df):
         for mz, inten, ion in df[
             ['mz_values','intensity_values','ions']
         ].values:
+            ion = ion.lower().replace('modloss','modnl')
             self.fig.add_annotation(
                 x=mz, y=inten+yshift,
                 text=ion,
@@ -294,18 +294,18 @@ def _plot_frag_annotations(self, plot_df):
                 col=self.col,
             )
 
-        neg_ay = max_inten*0.3
         pred_df = plot_df.query('intensity_values<0')
         pred_df = pred_df[~pred_df.ions.isin(set(df.ions))]
         for mz, inten, ion in pred_df[
             ['mz_values','intensity_values','ions']
         ].values:
+            ion = ion.lower().replace('modloss','modnl')
             self.fig.add_annotation(
                 x=mz, y=inten-yshift,
                 text=ion,
                 textangle=-90,
                 font_size=10,
-                ay=inten-yshift-neg_ay,
+                ay=inten-yshift-max_inten*(0.28+len(ion)/60),
                 ayref=f'y{self.row}',
                 yref=f'y{self.row}',
                 row=self.row,
@@ -362,7 +362,7 @@ def plot(self,
         d = (
             plot_df.mz_values.max() - 
             plot_df.mz_values.min()
-        ) * 2/8
+        ) * 2/len(sequence)
         aa_x_positions = np.linspace(
             plot_df.mz_values.min()+d, 
             plot_df.mz_values.max()-d, 

diff --git a/alphaviz/contrib/ms_viz.py b/alphaviz/contrib/ms_viz.py
@@ -8,23 +8,34 @@
 
 from peptdeep.pretrained_models import ModelManager
 
+from alpharaw.match.psm_match_alphatims import PepSpecMatch_AlphaTims
+
 from .ms2_plot import MS2_Plot
 from .xic_plot import XIC_1D_Plot
-from .reader_utils import load_ms_data, load_psms
+from .reader_utils import load_psms
 
 from .peptdeep_utils import (
     match_ms2, get_frag_df_from_peptide_info,
     predict_one_peptide, get_peptide_info_from_dfs,
 )
 
 class MS_Viz:
-    min_frag_mz:float = 200.0
+    _min_frag_mz:float = 200.0
+    _labeled_sites = ['K','N-term']
+    remove_unlabeled_fragments = False
+    ms2_ppm_tol=20.0
+    ms1_ppm_tol=20.0
+    rt_sec_tol_to_slice_spectrum = 3.0
+    im_tol_to_slice_spectrum = 0.05
+    find_closest_ms2_by_rt_sec = True
     def __init__(self, 
         model_mgr:ModelManager,
         frag_types:list = ['b','y','b-modloss','y-modloss'],
     ):
         self.model_mgr = model_mgr
-        self.ms_data = None
+        self.tims_data = None
+        self.pep_spec_match = None
+
         self.psm_df = pd.DataFrame()
         self.fragment_mz_df = pd.DataFrame()
         self.fragment_intensity_df = pd.DataFrame()
@@ -35,15 +46,36 @@ def __init__(self,
             self._frag_types, self._max_frag_charge
         )
 
+        self.prediction_mode=False
+
         self.ms2_plot = MS2_Plot()
         self.xic_1d_plot = XIC_1D_Plot()
 
-    def load_ms_data(self, ms_file, dda:bool):
-        self.ms_data = load_ms_data(ms_file, dda=dda)
+    @property
+    def min_frag_mz(self):
+        return self._min_frag_mz
+
+    @min_frag_mz.setter
+    def min_frag_mz(self, val):
+        self._min_frag_mz = val
+        self.xic_1d_plot.min_frag_mz = val
+
+    def load_ms_data(self, ms_file, ms_file_type, dda:bool):
+        self.tims_match = PepSpecMatch_AlphaTims(
+            self.charged_frag_types, centroid_mode=True,
+        )
+        self.tims_match.load_ms_data(ms_file, ms_file_type, dda)
+        self.tims_data = self.tims_match.tims_data
+
+    def add_rt_im_columns_to_psm_df_if_missing(self):
+        self.psm_df = self.tims_match._add_missing_columns_to_psm_df(
+            self.psm_df
+        )
 
     def load_psms(self, 
-        psm_file, psm_type,
-        get_fragments=False,
+        psm_file:str, psm_type:str,
+        get_fragments:bool=False,
+        add_modification_mapping:dict=None,
     ):
         (
             self.psm_df, self.fragment_mz_df, 
@@ -54,24 +86,27 @@ def load_psms(self,
             model_mgr=self.model_mgr,
             frag_types=self._frag_types,
             max_frag_charge=self._max_frag_charge,
+            add_modification_mapping=add_modification_mapping,
         )
 
     def predict_one_peptide_info(self,
         one_pept_df:pd.DataFrame
-    )->dict:
+    )->pd.DataFrame:
         return predict_one_peptide(
             self.model_mgr, one_pept_df, 
-            self.ms_data.rt_max_value
+            self.tims_data.rt_max_value,
+            self.prediction_mode,
+            self._labeled_sites if self.remove_unlabeled_fragments else None
         )
 
     def extract_one_peptide_info(self,
         one_pept_df:pd.DataFrame,
-    )->dict:
+    )->pd.DataFrame:
         return get_peptide_info_from_dfs(
             one_pept_df,
             self.fragment_mz_df, 
             self.fragment_intensity_df,
-            self.ms_data.rt_max_value,
+            self.tims_data.rt_max_value,
         )
 
     def transfer_learn(self):
@@ -87,31 +122,23 @@ def transfer_learn(self):
             )
 
     def plot_elution_profile_heatmap(self,
-        peptide_info: dict,
-        mz_tol: float = 50,
-        rt_tol: float = 30,
-        im_tol: float = 0.05,
+        peptide_info: pd.DataFrame,
     ):
         raise NotImplementedError('TODO for timsTOF data')
 
     def plot_elution_profile(self,
-        peptide_info: dict,
-        mz_tol: float = 50,
-        rt_tol: float = 30,
-        im_tol: float = 0.05,
+        peptide_info: pd.DataFrame,
         include_precursor:bool=True,
+        include_ms1:bool=True,
     )->go.Figure:
         """Based on `alphaviz.plotting.plot_elution_profile`
 
         Parameters
         ----------
-        peptide_info : dict
-            alphaviz peptide_info dict, 
+        peptide_info : pd.DataFrame
+            alphaviz peptide_info, 
             see `self.predict_one_peptide`.
 
-        mz_tol : float, optional
-            in ppm, by default 50
-
         rt_tol : float, optional
             RT tol in seconds, by default 30
 
@@ -127,13 +154,13 @@ def plot_elution_profile(self,
             plotly Figure object return by 
             `alphaviz.plotting.plot_elution_profile`
         """
+        self.xic_1d_plot.ms1_ppm_tol = self.ms1_ppm_tol
+        self.xic_1d_plot.ms2_ppm_tol = self.ms2_ppm_tol
         return self.xic_1d_plot.plot(
-            self.ms_data,
+            self.tims_data,
             peptide_info=peptide_info,
-            mz_tol=mz_tol,
-            rt_tol=rt_tol,
-            im_tol=im_tol,
-            include_precursor=include_precursor
+            include_precursor=include_precursor,
+            include_ms1=include_ms1,
         )
 
     def _add_unmatched_df(self, plot_df, spec_df):
@@ -142,11 +169,10 @@ def _add_unmatched_df(self, plot_df, spec_df):
         return pd.concat([spec_df, plot_df], ignore_index=True)
 
     def plot_mirror_ms2(self, 
-        peptide_info:dict,
+        peptide_info:pd.DataFrame,
         frag_df:pd.DataFrame=None, 
         spec_df:pd.DataFrame=None, 
         title:str="", 
-        mz_tol:float=50,
         matching_mode:str="centroid",
         plot_unmatched_peaks:bool=False,
     )->go.Figure:
@@ -156,8 +182,8 @@ def plot_mirror_ms2(self,
         Parameters
         ----------
 
-        peptide_info : dict
-            peptide_info dict in alphaviz format
+        peptide_info : pd.DataFrame
+            peptide_info in alphaviz format
 
         frag_df : pd.DataFrame, optional
             Fragment DF
@@ -166,9 +192,6 @@ def plot_mirror_ms2(self,
             AlphaTims sliced DataFrame for raw data,
             by default None
 
-        mz_tol : float, optional
-            in ppm, by default 50
-
         matching_mode : str, optional
             peak matching mode, by default "centroid"
         
@@ -187,45 +210,65 @@ def plot_mirror_ms2(self,
 
         frag_df = frag_df[
             frag_df.mz_values>=max(
-                spec_df.mz_values.min()-0.1, self.min_frag_mz
+                spec_df.mz_values.min()-0.1, self._min_frag_mz
             )
         ]
-
+        spec_df['intensity_values'] = spec_df.intensity_values.astype(float)
         plot_df, pcc, spc = match_ms2(
             spec_df=spec_df, frag_df=frag_df,
-            mz_tol=mz_tol, 
+            mz_tol=self.ms2_ppm_tol, 
             matching_mode=matching_mode,
         )
 
+        peptide_info['pcc'] = pcc
+        peptide_info['spc'] = spc
+
         if plot_unmatched_peaks:
             plot_df = self._add_unmatched_df(
                 plot_df, spec_df
             )
 
         if not title:
-            title = f"{peptide_info['mod_seq_charge']} PCC={pcc:.3f}"
+            title = f"{peptide_info['mod_seq_charge'].values[0]} PCC={pcc:.3f}"
 
         plot_df = plot_df.query('intensity_values!=0')
 
         return self.ms2_plot.plot(
             plot_df, 
             title=title,
-            sequence=peptide_info['sequence'],
+            sequence=peptide_info['sequence'].values[0],
             plot_unmatched_peaks=plot_unmatched_peaks,
         )
 
     def get_ms2_spec_df(self, peptide_info)->pd.DataFrame:
         im_slice = (
-            slice(None) if peptide_info['im'] == 0 else 
-            slice(peptide_info['im']-0.05,peptide_info['im']+0.05)
+            slice(None) if peptide_info['im'].values[0] == 0 else 
+            slice(
+                peptide_info['im'].values[0]-self.im_tol_to_slice_spectrum,
+                peptide_info['im'].values[0]+self.im_tol_to_slice_spectrum
+            )
+        )
+        query_rt = peptide_info['rt_sec'].values[0]
+        rt_slice = slice(
+            query_rt-self.rt_sec_tol_to_slice_spectrum,
+            query_rt+self.rt_sec_tol_to_slice_spectrum
         )
-        rt_slice = slice(peptide_info['rt']-0.5,peptide_info['rt']+0.5)
 
-        spec_df = self.ms_data[
+        spec_df = self.tims_data[
             rt_slice, im_slice
         ]
-        return spec_df[
-            (spec_df.quad_low_mz_values <= peptide_info['mz'])
-            &(spec_df.quad_high_mz_values >= peptide_info['mz'])
+        spec_df = spec_df[
+            (spec_df.quad_low_mz_values <= peptide_info['precursor_mz'].values[0])
+            &(spec_df.quad_high_mz_values >= peptide_info['precursor_mz'].values[0])
         ].reset_index(drop=True)
 
+        _df = spec_df
+
+        if self.find_closest_ms2_by_rt_sec:
+            min_rt_dist = 1000000
+            for _, df in spec_df.groupby('frame_indices'):
+                if abs(df.rt_values.values[0]-query_rt) < min_rt_dist:
+                    _df = df
+                    min_rt_dist = abs(df.rt_values.values[0]-query_rt)
+        return _df
+