Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Peptdeep latest #32

Merged
merged 21 commits into from
Oct 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions alphaviz/contrib/ms2_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,8 @@ def plot(self, plot_df, sequence, title,
def _init_plot(self, title):

self.fig = plotly.subplots.make_subplots(
rows=(
self.rows
), cols=3,
rows=self.rows,
cols=3,
shared_xaxes=True,
specs=self.specs,
vertical_spacing=self.vertical_spacing,
Expand Down Expand Up @@ -285,6 +284,7 @@ def _plot_frag_annotations(self, plot_df):
for mz, inten, ion in df[
['mz_values','intensity_values','ions']
].values:
ion = ion.lower().replace('modloss','modnl')
self.fig.add_annotation(
x=mz, y=inten+yshift,
text=ion,
Expand All @@ -294,18 +294,18 @@ def _plot_frag_annotations(self, plot_df):
col=self.col,
)

neg_ay = max_inten*0.3
pred_df = plot_df.query('intensity_values<0')
pred_df = pred_df[~pred_df.ions.isin(set(df.ions))]
for mz, inten, ion in pred_df[
['mz_values','intensity_values','ions']
].values:
ion = ion.lower().replace('modloss','modnl')
self.fig.add_annotation(
x=mz, y=inten-yshift,
text=ion,
textangle=-90,
font_size=10,
ay=inten-yshift-neg_ay,
ay=inten-yshift-max_inten*(0.28+len(ion)/60),
ayref=f'y{self.row}',
yref=f'y{self.row}',
row=self.row,
Expand Down Expand Up @@ -362,7 +362,7 @@ def plot(self,
d = (
plot_df.mz_values.max() -
plot_df.mz_values.min()
) * 2/8
) * 2/len(sequence)
aa_x_positions = np.linspace(
plot_df.mz_values.min()+d,
plot_df.mz_values.max()-d,
Expand Down
139 changes: 91 additions & 48 deletions alphaviz/contrib/ms_viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,34 @@

from peptdeep.pretrained_models import ModelManager

from alpharaw.match.psm_match_alphatims import PepSpecMatch_AlphaTims

from .ms2_plot import MS2_Plot
from .xic_plot import XIC_1D_Plot
from .reader_utils import load_ms_data, load_psms
from .reader_utils import load_psms

from .peptdeep_utils import (
match_ms2, get_frag_df_from_peptide_info,
predict_one_peptide, get_peptide_info_from_dfs,
)

class MS_Viz:
min_frag_mz:float = 200.0
_min_frag_mz:float = 200.0
_labeled_sites = ['K','N-term']
remove_unlabeled_fragments = False
ms2_ppm_tol=20.0
ms1_ppm_tol=20.0
rt_sec_tol_to_slice_spectrum = 3.0
im_tol_to_slice_spectrum = 0.05
find_closest_ms2_by_rt_sec = True
def __init__(self,
model_mgr:ModelManager,
frag_types:list = ['b','y','b-modloss','y-modloss'],
):
self.model_mgr = model_mgr
self.ms_data = None
self.tims_data = None
self.pep_spec_match = None

self.psm_df = pd.DataFrame()
self.fragment_mz_df = pd.DataFrame()
self.fragment_intensity_df = pd.DataFrame()
Expand All @@ -35,15 +46,36 @@ def __init__(self,
self._frag_types, self._max_frag_charge
)

self.prediction_mode=False

self.ms2_plot = MS2_Plot()
self.xic_1d_plot = XIC_1D_Plot()

def load_ms_data(self, ms_file, dda:bool):
self.ms_data = load_ms_data(ms_file, dda=dda)
@property
def min_frag_mz(self):
return self._min_frag_mz

@min_frag_mz.setter
def min_frag_mz(self, val):
self._min_frag_mz = val
self.xic_1d_plot.min_frag_mz = val

def load_ms_data(self, ms_file, ms_file_type, dda:bool):
self.tims_match = PepSpecMatch_AlphaTims(
self.charged_frag_types, centroid_mode=True,
)
self.tims_match.load_ms_data(ms_file, ms_file_type, dda)
self.tims_data = self.tims_match.tims_data

def add_rt_im_columns_to_psm_df_if_missing(self):
self.psm_df = self.tims_match._add_missing_columns_to_psm_df(
self.psm_df
)

def load_psms(self,
psm_file, psm_type,
get_fragments=False,
psm_file:str, psm_type:str,
get_fragments:bool=False,
add_modification_mapping:dict=None,
):
(
self.psm_df, self.fragment_mz_df,
Expand All @@ -54,24 +86,27 @@ def load_psms(self,
model_mgr=self.model_mgr,
frag_types=self._frag_types,
max_frag_charge=self._max_frag_charge,
add_modification_mapping=add_modification_mapping,
)

def predict_one_peptide_info(self,
one_pept_df:pd.DataFrame
)->dict:
)->pd.DataFrame:
return predict_one_peptide(
self.model_mgr, one_pept_df,
self.ms_data.rt_max_value
self.tims_data.rt_max_value,
self.prediction_mode,
self._labeled_sites if self.remove_unlabeled_fragments else None
)

def extract_one_peptide_info(self,
one_pept_df:pd.DataFrame,
)->dict:
)->pd.DataFrame:
return get_peptide_info_from_dfs(
one_pept_df,
self.fragment_mz_df,
self.fragment_intensity_df,
self.ms_data.rt_max_value,
self.tims_data.rt_max_value,
)

def transfer_learn(self):
Expand All @@ -87,31 +122,23 @@ def transfer_learn(self):
)

def plot_elution_profile_heatmap(self,
peptide_info: dict,
mz_tol: float = 50,
rt_tol: float = 30,
im_tol: float = 0.05,
peptide_info: pd.DataFrame,
):
raise NotImplementedError('TODO for timsTOF data')

def plot_elution_profile(self,
peptide_info: dict,
mz_tol: float = 50,
rt_tol: float = 30,
im_tol: float = 0.05,
peptide_info: pd.DataFrame,
include_precursor:bool=True,
include_ms1:bool=True,
)->go.Figure:
"""Based on `alphaviz.plotting.plot_elution_profile`

Parameters
----------
peptide_info : dict
alphaviz peptide_info dict,
peptide_info : pd.DataFrame
alphaviz peptide_info,
see `self.predict_one_peptide`.

mz_tol : float, optional
in ppm, by default 50

rt_tol : float, optional
RT tol in seconds, by default 30

Expand All @@ -127,13 +154,13 @@ def plot_elution_profile(self,
plotly Figure object return by
`alphaviz.plotting.plot_elution_profile`
"""
self.xic_1d_plot.ms1_ppm_tol = self.ms1_ppm_tol
self.xic_1d_plot.ms2_ppm_tol = self.ms2_ppm_tol
return self.xic_1d_plot.plot(
self.ms_data,
self.tims_data,
peptide_info=peptide_info,
mz_tol=mz_tol,
rt_tol=rt_tol,
im_tol=im_tol,
include_precursor=include_precursor
include_precursor=include_precursor,
include_ms1=include_ms1,
)

def _add_unmatched_df(self, plot_df, spec_df):
Expand All @@ -142,11 +169,10 @@ def _add_unmatched_df(self, plot_df, spec_df):
return pd.concat([spec_df, plot_df], ignore_index=True)

def plot_mirror_ms2(self,
peptide_info:dict,
peptide_info:pd.DataFrame,
frag_df:pd.DataFrame=None,
spec_df:pd.DataFrame=None,
title:str="",
mz_tol:float=50,
matching_mode:str="centroid",
plot_unmatched_peaks:bool=False,
)->go.Figure:
Expand All @@ -156,8 +182,8 @@ def plot_mirror_ms2(self,
Parameters
----------

peptide_info : dict
peptide_info dict in alphaviz format
peptide_info : pd.DataFrame
peptide_info in alphaviz format

frag_df : pd.DataFrame, optional
Fragment DF
Expand All @@ -166,9 +192,6 @@ def plot_mirror_ms2(self,
AlphaTims sliced DataFrame for raw data,
by default None

mz_tol : float, optional
in ppm, by default 50

matching_mode : str, optional
peak matching mode, by default "centroid"

Expand All @@ -187,45 +210,65 @@ def plot_mirror_ms2(self,

frag_df = frag_df[
frag_df.mz_values>=max(
spec_df.mz_values.min()-0.1, self.min_frag_mz
spec_df.mz_values.min()-0.1, self._min_frag_mz
)
]

spec_df['intensity_values'] = spec_df.intensity_values.astype(float)
plot_df, pcc, spc = match_ms2(
spec_df=spec_df, frag_df=frag_df,
mz_tol=mz_tol,
mz_tol=self.ms2_ppm_tol,
matching_mode=matching_mode,
)

peptide_info['pcc'] = pcc
peptide_info['spc'] = spc

if plot_unmatched_peaks:
plot_df = self._add_unmatched_df(
plot_df, spec_df
)

if not title:
title = f"{peptide_info['mod_seq_charge']} PCC={pcc:.3f}"
title = f"{peptide_info['mod_seq_charge'].values[0]} PCC={pcc:.3f}"

plot_df = plot_df.query('intensity_values!=0')

return self.ms2_plot.plot(
plot_df,
title=title,
sequence=peptide_info['sequence'],
sequence=peptide_info['sequence'].values[0],
plot_unmatched_peaks=plot_unmatched_peaks,
)

def get_ms2_spec_df(self, peptide_info)->pd.DataFrame:
im_slice = (
slice(None) if peptide_info['im'] == 0 else
slice(peptide_info['im']-0.05,peptide_info['im']+0.05)
slice(None) if peptide_info['im'].values[0] == 0 else
slice(
peptide_info['im'].values[0]-self.im_tol_to_slice_spectrum,
peptide_info['im'].values[0]+self.im_tol_to_slice_spectrum
)
)
query_rt = peptide_info['rt_sec'].values[0]
rt_slice = slice(
query_rt-self.rt_sec_tol_to_slice_spectrum,
query_rt+self.rt_sec_tol_to_slice_spectrum
)
rt_slice = slice(peptide_info['rt']-0.5,peptide_info['rt']+0.5)

spec_df = self.ms_data[
spec_df = self.tims_data[
rt_slice, im_slice
]
return spec_df[
(spec_df.quad_low_mz_values <= peptide_info['mz'])
&(spec_df.quad_high_mz_values >= peptide_info['mz'])
spec_df = spec_df[
(spec_df.quad_low_mz_values <= peptide_info['precursor_mz'].values[0])
&(spec_df.quad_high_mz_values >= peptide_info['precursor_mz'].values[0])
].reset_index(drop=True)

_df = spec_df

if self.find_closest_ms2_by_rt_sec:
min_rt_dist = 1000000
for _, df in spec_df.groupby('frame_indices'):
if abs(df.rt_values.values[0]-query_rt) < min_rt_dist:
_df = df
min_rt_dist = abs(df.rt_values.values[0]-query_rt)
return _df

Loading