Skip to content

Commit

Permalink
Merge pull request #32 from jalew188/peptdeep_latest
Browse files Browse the repository at this point in the history
Peptdeep latest
  • Loading branch information
EugeniaVoytik authored Oct 28, 2022
2 parents cb10e38 + 85ed0dc commit a196076
Show file tree
Hide file tree
Showing 11 changed files with 805 additions and 279 deletions.
12 changes: 6 additions & 6 deletions alphaviz/contrib/ms2_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,8 @@ def plot(self, plot_df, sequence, title,
def _init_plot(self, title):

self.fig = plotly.subplots.make_subplots(
rows=(
self.rows
), cols=3,
rows=self.rows,
cols=3,
shared_xaxes=True,
specs=self.specs,
vertical_spacing=self.vertical_spacing,
Expand Down Expand Up @@ -285,6 +284,7 @@ def _plot_frag_annotations(self, plot_df):
for mz, inten, ion in df[
['mz_values','intensity_values','ions']
].values:
ion = ion.lower().replace('modloss','modnl')
self.fig.add_annotation(
x=mz, y=inten+yshift,
text=ion,
Expand All @@ -294,18 +294,18 @@ def _plot_frag_annotations(self, plot_df):
col=self.col,
)

neg_ay = max_inten*0.3
pred_df = plot_df.query('intensity_values<0')
pred_df = pred_df[~pred_df.ions.isin(set(df.ions))]
for mz, inten, ion in pred_df[
['mz_values','intensity_values','ions']
].values:
ion = ion.lower().replace('modloss','modnl')
self.fig.add_annotation(
x=mz, y=inten-yshift,
text=ion,
textangle=-90,
font_size=10,
ay=inten-yshift-neg_ay,
ay=inten-yshift-max_inten*(0.28+len(ion)/60),
ayref=f'y{self.row}',
yref=f'y{self.row}',
row=self.row,
Expand Down Expand Up @@ -362,7 +362,7 @@ def plot(self,
d = (
plot_df.mz_values.max() -
plot_df.mz_values.min()
) * 2/8
) * 2/len(sequence)
aa_x_positions = np.linspace(
plot_df.mz_values.min()+d,
plot_df.mz_values.max()-d,
Expand Down
139 changes: 91 additions & 48 deletions alphaviz/contrib/ms_viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,34 @@

from peptdeep.pretrained_models import ModelManager

from alpharaw.match.psm_match_alphatims import PepSpecMatch_AlphaTims

from .ms2_plot import MS2_Plot
from .xic_plot import XIC_1D_Plot
from .reader_utils import load_ms_data, load_psms
from .reader_utils import load_psms

from .peptdeep_utils import (
match_ms2, get_frag_df_from_peptide_info,
predict_one_peptide, get_peptide_info_from_dfs,
)

class MS_Viz:
min_frag_mz:float = 200.0
_min_frag_mz:float = 200.0
_labeled_sites = ['K','N-term']
remove_unlabeled_fragments = False
ms2_ppm_tol=20.0
ms1_ppm_tol=20.0
rt_sec_tol_to_slice_spectrum = 3.0
im_tol_to_slice_spectrum = 0.05
find_closest_ms2_by_rt_sec = True
def __init__(self,
model_mgr:ModelManager,
frag_types:list = ['b','y','b-modloss','y-modloss'],
):
self.model_mgr = model_mgr
self.ms_data = None
self.tims_data = None
self.pep_spec_match = None

self.psm_df = pd.DataFrame()
self.fragment_mz_df = pd.DataFrame()
self.fragment_intensity_df = pd.DataFrame()
Expand All @@ -35,15 +46,36 @@ def __init__(self,
self._frag_types, self._max_frag_charge
)

self.prediction_mode=False

self.ms2_plot = MS2_Plot()
self.xic_1d_plot = XIC_1D_Plot()

def load_ms_data(self, ms_file, dda:bool):
self.ms_data = load_ms_data(ms_file, dda=dda)
@property
def min_frag_mz(self):
return self._min_frag_mz

@min_frag_mz.setter
def min_frag_mz(self, val):
self._min_frag_mz = val
self.xic_1d_plot.min_frag_mz = val

def load_ms_data(self, ms_file, ms_file_type, dda:bool):
self.tims_match = PepSpecMatch_AlphaTims(
self.charged_frag_types, centroid_mode=True,
)
self.tims_match.load_ms_data(ms_file, ms_file_type, dda)
self.tims_data = self.tims_match.tims_data

def add_rt_im_columns_to_psm_df_if_missing(self):
self.psm_df = self.tims_match._add_missing_columns_to_psm_df(
self.psm_df
)

def load_psms(self,
psm_file, psm_type,
get_fragments=False,
psm_file:str, psm_type:str,
get_fragments:bool=False,
add_modification_mapping:dict=None,
):
(
self.psm_df, self.fragment_mz_df,
Expand All @@ -54,24 +86,27 @@ def load_psms(self,
model_mgr=self.model_mgr,
frag_types=self._frag_types,
max_frag_charge=self._max_frag_charge,
add_modification_mapping=add_modification_mapping,
)

def predict_one_peptide_info(self,
one_pept_df:pd.DataFrame
)->dict:
)->pd.DataFrame:
return predict_one_peptide(
self.model_mgr, one_pept_df,
self.ms_data.rt_max_value
self.tims_data.rt_max_value,
self.prediction_mode,
self._labeled_sites if self.remove_unlabeled_fragments else None
)

def extract_one_peptide_info(self,
one_pept_df:pd.DataFrame,
)->dict:
)->pd.DataFrame:
return get_peptide_info_from_dfs(
one_pept_df,
self.fragment_mz_df,
self.fragment_intensity_df,
self.ms_data.rt_max_value,
self.tims_data.rt_max_value,
)

def transfer_learn(self):
Expand All @@ -87,31 +122,23 @@ def transfer_learn(self):
)

def plot_elution_profile_heatmap(self,
peptide_info: dict,
mz_tol: float = 50,
rt_tol: float = 30,
im_tol: float = 0.05,
peptide_info: pd.DataFrame,
):
raise NotImplementedError('TODO for timsTOF data')

def plot_elution_profile(self,
peptide_info: dict,
mz_tol: float = 50,
rt_tol: float = 30,
im_tol: float = 0.05,
peptide_info: pd.DataFrame,
include_precursor:bool=True,
include_ms1:bool=True,
)->go.Figure:
"""Based on `alphaviz.plotting.plot_elution_profile`
Parameters
----------
peptide_info : dict
alphaviz peptide_info dict,
peptide_info : pd.DataFrame
alphaviz peptide_info,
see `self.predict_one_peptide`.
mz_tol : float, optional
in ppm, by default 50
rt_tol : float, optional
RT tol in seconds, by default 30
Expand All @@ -127,13 +154,13 @@ def plot_elution_profile(self,
plotly Figure object return by
`alphaviz.plotting.plot_elution_profile`
"""
self.xic_1d_plot.ms1_ppm_tol = self.ms1_ppm_tol
self.xic_1d_plot.ms2_ppm_tol = self.ms2_ppm_tol
return self.xic_1d_plot.plot(
self.ms_data,
self.tims_data,
peptide_info=peptide_info,
mz_tol=mz_tol,
rt_tol=rt_tol,
im_tol=im_tol,
include_precursor=include_precursor
include_precursor=include_precursor,
include_ms1=include_ms1,
)

def _add_unmatched_df(self, plot_df, spec_df):
Expand All @@ -142,11 +169,10 @@ def _add_unmatched_df(self, plot_df, spec_df):
return pd.concat([spec_df, plot_df], ignore_index=True)

def plot_mirror_ms2(self,
peptide_info:dict,
peptide_info:pd.DataFrame,
frag_df:pd.DataFrame=None,
spec_df:pd.DataFrame=None,
title:str="",
mz_tol:float=50,
matching_mode:str="centroid",
plot_unmatched_peaks:bool=False,
)->go.Figure:
Expand All @@ -156,8 +182,8 @@ def plot_mirror_ms2(self,
Parameters
----------
peptide_info : dict
peptide_info dict in alphaviz format
peptide_info : pd.DataFrame
peptide_info in alphaviz format
frag_df : pd.DataFrame, optional
Fragment DF
Expand All @@ -166,9 +192,6 @@ def plot_mirror_ms2(self,
AlphaTims sliced DataFrame for raw data,
by default None
mz_tol : float, optional
in ppm, by default 50
matching_mode : str, optional
peak matching mode, by default "centroid"
Expand All @@ -187,45 +210,65 @@ def plot_mirror_ms2(self,

frag_df = frag_df[
frag_df.mz_values>=max(
spec_df.mz_values.min()-0.1, self.min_frag_mz
spec_df.mz_values.min()-0.1, self._min_frag_mz
)
]

spec_df['intensity_values'] = spec_df.intensity_values.astype(float)
plot_df, pcc, spc = match_ms2(
spec_df=spec_df, frag_df=frag_df,
mz_tol=mz_tol,
mz_tol=self.ms2_ppm_tol,
matching_mode=matching_mode,
)

peptide_info['pcc'] = pcc
peptide_info['spc'] = spc

if plot_unmatched_peaks:
plot_df = self._add_unmatched_df(
plot_df, spec_df
)

if not title:
title = f"{peptide_info['mod_seq_charge']} PCC={pcc:.3f}"
title = f"{peptide_info['mod_seq_charge'].values[0]} PCC={pcc:.3f}"

plot_df = plot_df.query('intensity_values!=0')

return self.ms2_plot.plot(
plot_df,
title=title,
sequence=peptide_info['sequence'],
sequence=peptide_info['sequence'].values[0],
plot_unmatched_peaks=plot_unmatched_peaks,
)

def get_ms2_spec_df(self, peptide_info)->pd.DataFrame:
im_slice = (
slice(None) if peptide_info['im'] == 0 else
slice(peptide_info['im']-0.05,peptide_info['im']+0.05)
slice(None) if peptide_info['im'].values[0] == 0 else
slice(
peptide_info['im'].values[0]-self.im_tol_to_slice_spectrum,
peptide_info['im'].values[0]+self.im_tol_to_slice_spectrum
)
)
query_rt = peptide_info['rt_sec'].values[0]
rt_slice = slice(
query_rt-self.rt_sec_tol_to_slice_spectrum,
query_rt+self.rt_sec_tol_to_slice_spectrum
)
rt_slice = slice(peptide_info['rt']-0.5,peptide_info['rt']+0.5)

spec_df = self.ms_data[
spec_df = self.tims_data[
rt_slice, im_slice
]
return spec_df[
(spec_df.quad_low_mz_values <= peptide_info['mz'])
&(spec_df.quad_high_mz_values >= peptide_info['mz'])
spec_df = spec_df[
(spec_df.quad_low_mz_values <= peptide_info['precursor_mz'].values[0])
&(spec_df.quad_high_mz_values >= peptide_info['precursor_mz'].values[0])
].reset_index(drop=True)

_df = spec_df

if self.find_closest_ms2_by_rt_sec:
min_rt_dist = 1000000
for _, df in spec_df.groupby('frame_indices'):
if abs(df.rt_values.values[0]-query_rt) < min_rt_dist:
_df = df
min_rt_dist = abs(df.rt_values.values[0]-query_rt)
return _df

Loading

0 comments on commit a196076

Please sign in to comment.