diff --git a/screenpro/__init__.py b/screenpro/__init__.py index 5715699..edb6615 100644 --- a/screenpro/__init__.py +++ b/screenpro/__init__.py @@ -31,6 +31,6 @@ from .dashboard import DrugScreenDashboard -__version__ = "0.4.7" +__version__ = "0.4.8" __author__ = "Abe Arab" __email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com" diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index d925203..b268d04 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -16,6 +16,7 @@ from ..phenoscore import runPhenoScore, runPhenoScoreForReplicate from ..preprocessing import addPseudoCount, findLowCounts, normalizeSeqDepth from ..phenoscore.annotate import annotateScoreTable, hit_dict +from ..plotting import volcano_plot, label_resistance_hit, label_sensitivity_hit import warnings from copy import copy @@ -251,34 +252,6 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ **kwargs ) self._add_phenotype_results(run_name, f'rho:{rho_name}', rho) - - # gnerate replicate level phenotype scores - pdata_dict = {} - for score_name in self.phenotypes[score_level]['results'].keys(): - score_label, comparison = score_name.split(':') - y_label, x_label = comparison.split('_vs_') - - #TODO: get growth rates for replicate level scores - - - pdata_dict.update({ - score_name: runPhenoScoreForReplicate( - self.adata, x_label = x_label, y_label = y_label, - transformation=self.fc_transformation, - # growth_factor_reps= - # **kwargs - ).add_prefix(f'{score_label}_').T # transpose to match pdata format - }) - - pdata_df = pd.concat(pdata_dict, axis=0) - - #TODO: fix `_calculateGrowthFactor` and `_getTreatmentDoublingRate` to maintain same format - # add .pdata - self.pdata = ad.AnnData( - X = pdata_df, - # obs = growth_factor_table.loc[pdata_df.index,:], - var=self.adata.var - ) def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None, **kwargs): """ @@ -328,20 +301,24 @@ def listPhenotypeScores(self, run_name='auto'): return out - def getPhenotypeScores(self, score_name, threshold, run_name='auto', ctrl_label='negative_control', target_col='target',pvalue_col='ttest pvalue', score_col='score'): - """ - Get phenotype scores for a given score level - - Args: - score_name (str): name of the score to retrieve, e.g. 'gamma', 'tau', 'rho', 'delta' - threshold (float): threshold for filtering significant hits, default is 5 - run_name (str): name of the phenotype calculation run to retrieve - ctrl_label (str): label for the negative control, default is 'negative_control' - target_col (str): column name for the target gene, default is 'target' - pvalue_column (str): column name for the p-value, default is 'ttest pvalue' - score_column (str): column name for the score, default is 'score' - """ - + def drawVolcano( + self, ax, + phenotype_name, + threshold, + dot_size=1, + run_name='auto', + score_col='score', + pvalue_col='pvalue', + xlabel='auto', + ylabel='-log10(pvalue)', + xlims='auto', + ylims='auto', + ctrl_label='negative_control', + resistance_hits=None, + sensitivity_hits=None, + size_txt=None, + **args + ): if run_name == 'auto': if len(list(self.phenotypes.keys())) == 1: run_name = list(self.phenotypes.keys())[0] @@ -352,88 +329,56 @@ def getPhenotypeScores(self, score_name, threshold, run_name='auto', ctrl_label= '' + ', '.join(self.phenotypes.keys()) ) - if score_name not in self.phenotype_names: - raise ValueError(f"Phenotype '{score_name}' not found in self.phenotype_names") + score_tag, _ = phenotype_name.split(':') - keep_col = [target_col, score_col, pvalue_col] - score_tag = score_name.split(':')[0] - out = annotateScoreTable( - self.phenotypes[run_name][score_name].loc[:,keep_col], - threshold=threshold, - up_hit=hit_dict[score_tag]['up_hit'], + df = self.phenotypes[run_name]['results'][phenotype_name] + + df = annotateScoreTable( + df, + up_hit=hit_dict[score_tag]['up_hit'], down_hit=hit_dict[score_tag]['down_hit'], - ctrl_label=ctrl_label, - score_col=score_col, - pvalue_col=pvalue_col + score_col=score_col, pvalue_col=pvalue_col, + ctrl_label=ctrl_label, + threshold=threshold, ) - return out - - def getAnnotatedTable(self, threshold, run_name='auto', ctrl_label='negative_control', target_col='target', pvalue_col='ttest pvalue', score_col='score'): - """ - Returns an annotated table with scores, labels, and replicate phenotypes. - - Args: - threshold (int, optional): The threshold value for determining hits. Defaults to 5. - run_name (str, optional): The name of the phenotype calculation run. Defaults to 'auto'. - ctrl_label (str, optional): The label for the control group. Defaults to 'negative_control'. - target_col (str, optional): The column name for the target. Defaults to 'target'. - pvalue_column (str, optional): The column name for the p-value. Defaults to 'ttest pvalue'. - score_column (str, optional): The column name for the score. Defaults to 'score'. + df['-log10(pvalue)'] = -np.log10(df[pvalue_col]) - Returns: - pandas.DataFrame: An annotated table with scores, labels, and replicate phenotypes. - """ - if run_name == 'auto': - if len(list(self.phenotypes.keys())) == 1: - run_name = list(self.phenotypes.keys())[0] - else: - raise ValueError( - 'Multiple phenotype calculation runs found.' - 'Please specify run_name. Available runs: ' - '' + ', '.join(self.phenotypes.keys()) + if xlabel == 'auto': + xlabel = phenotype_name.replace(':', ': ').replace('_', ' ') + + volcano_plot(ax, df, + up_hit=hit_dict[score_tag]['up_hit'], + down_hit=hit_dict[score_tag]['down_hit'], + score_col=score_col, pvalue_col=pvalue_col, + xlabel=xlabel, ylabel=ylabel, + dot_size=dot_size, xlims=xlims, ylims=ylims, + ctrl_label=ctrl_label, + **args) + + if resistance_hits != None: + if type(resistance_hits) != list: resistance_hits = [resistance_hits] + for hit in resistance_hits: + label_resistance_hit( + ax=ax, df_in=df, label=hit, + x_col=score_col, + y_col='-log10(pvalue)', + size=dot_size * 2, + size_txt=size_txt ) - - keep_col = [target_col, score_col, pvalue_col] - # self.phenotypes[run_name] = pd.concat({ - # f'gamma:{gamma_name}': gamma, f'tau:{tau_name}': tau, f'rho:{rho_name}': rho - # }, axis=1) - - score_names = set(self.phenotypes[run_name]['results'].keys()) - sort_var = self.adata.var.sort_values(['targetType','target']).index.to_list() - - df_list = {} - for score_name in score_names: - score_tag = score_name.split(':')[0] - - # get annotated table - df_ann = annotateScoreTable( - self.phenotypes[run_name][score_name].loc[:,keep_col], - up_hit=hit_dict[score_tag]['up_hit'], - down_hit=hit_dict[score_tag]['down_hit'], - score_col=score_col, - pvalue_col=pvalue_col, - ctrl_label=ctrl_label, - threshold=threshold + if sensitivity_hits != None: + if type(sensitivity_hits) != list: sensitivity_hits = [sensitivity_hits] + for hit in sensitivity_hits: + label_sensitivity_hit( + ax=ax, df_in=df, label=hit, + x_col=score_col, + y_col='-log10(pvalue)', + pvalue_col=pvalue_col, + size=dot_size * 2, + size_txt=size_txt ) - # get replicate phe - df_phe_reps = self.pdata[self.pdata.obs.score.eq(score_tag)].to_df().T - - # make table - df = pd.concat([ - df_ann.drop(columns=['label']), - df_phe_reps, - df_ann['label'] - ],axis=1).loc[sort_var,:] - - df_list.update({score_name:df}) - - out = pd.concat(df_list,axis=1) - - return out - class GImaps(object): - pass \ No newline at end of file + pass diff --git a/screenpro/phenoscore/annotate.py b/screenpro/phenoscore/annotate.py index bc8277d..90eb96c 100644 --- a/screenpro/phenoscore/annotate.py +++ b/screenpro/phenoscore/annotate.py @@ -50,7 +50,7 @@ def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='con return df[score_col]/pseudo_sd * -np.log10(df[pvalue_col]) -def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalue_col=None, ctrl_label='control'): +def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalue_col=None, ctrl_label='negative_control'): """ Annotate the given score tabel diff --git a/screenpro/plotting/__init__.py b/screenpro/plotting/__init__.py index b8cb64c..2ec6344 100644 --- a/screenpro/plotting/__init__.py +++ b/screenpro/plotting/__init__.py @@ -6,5 +6,4 @@ import numpy as np import scanpy as sc from .qc_plots import plotReplicateScatter, plotCountDistribution -from .pheno_plots import volcano_plot, label_by_color -from .pheno_plots import DrugScreenPlotter \ No newline at end of file +from .pheno_plots import volcano_plot, label_by_color, label_resistance_hit, label_sensitivity_hit diff --git a/screenpro/plotting/pheno_plots.py b/screenpro/plotting/pheno_plots.py index 585bad6..6cc1521 100644 --- a/screenpro/plotting/pheno_plots.py +++ b/screenpro/plotting/pheno_plots.py @@ -67,6 +67,8 @@ def label_by_color(ax, df_in, label, textcolor='black', t_x=.5, t_y=-0.1, **args): + if size_txt == 'auto': size_txt = size * 2 + df = df_in.copy() target_data = df[df['target'] == label] @@ -81,8 +83,6 @@ def label_by_color(ax, df_in, label, if size_txt == None: pass - elif size_txt == 'auto': - size_txt = size * 2 else: # Annotate the points for i, _ in enumerate(target_data['target']): @@ -91,222 +91,106 @@ def label_by_color(ax, df_in, label, color=textcolor, size=size_txt) -class DrugScreenPlotter: - def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label='negative_control',run_name='auto'): - self.screen = screen - self.threshold = threshold - self.ctrl_label = ctrl_label - self.run_name = run_name - self.gamma_score_name = f'gamma:{untreated}_vs_{t0}' - self.rho_score_name = f'rho:{treated}_vs_{untreated}' - self.tau_score_name = f'tau:{treated}_vs_{t0}' - - def _prep_data(self, score_col='score', pvalue_col='pvalue'): - - gamma = self.screen.getPhenotypeScores( - run_name=self.run_name, - score_name=self.gamma_score_name, - threshold=self.threshold, - ctrl_label=self.ctrl_label, - score_col=score_col, - pvalue_col=pvalue_col - ) - - gamma[f'-log10({pvalue_col})'] = np.log10(gamma[pvalue_col]) * -1 - - tau = self.screen.getPhenotypeScores( - run_name=self.run_name, - score_name=self.tau_score_name, - threshold=self.threshold, - ctrl_label=self.ctrl_label, - score_col=score_col, - pvalue_col=pvalue_col - ) - tau[f'-log10({pvalue_col})'] = np.log10(tau[pvalue_col]) * -1 - - rho = self.screen.getPhenotypeScores( - run_name=self.run_name, - score_name=self.rho_score_name, - threshold=self.threshold, - ctrl_label=self.ctrl_label, - score_col=score_col, - pvalue_col=pvalue_col - ) - rho[f'-log10({pvalue_col})'] = np.log10(rho[pvalue_col]) * -1 - - return gamma, tau, rho +def label_as_black(ax, df_in, label, + x_col='score', y_col='-log10(pvalue)', + size=2, size_txt="auto", + t_x=.5, t_y=-0.1, + **args): + label_by_color( + ax, df_in, label, + x_col=x_col, y_col=y_col, + size=size, size_txt=size_txt, + edgecolors='black', facecolors='black', + textcolor='black', + t_x=t_x, t_y=t_y, + **args) + + +def label_sensitivity_hit(ax, df_in, label, + x_col='score', y_col='-log10(pvalue)', + size=2, size_txt="auto", + t_x=.5, t_y=-0.1, + **args): + label_by_color( + ax, df_in, label, + x_col=x_col, y_col=y_col, + size=size, size_txt=size_txt, + edgecolors='black', facecolors='#3182bd', + textcolor='black', + t_x=t_x, t_y=t_y, + **args) + + +def label_resistance_hit(ax, df_in, label, + x_col='score', y_col='-log10(pvalue)', + size=2, size_txt="auto", + t_x=.5, t_y=-0.1, + **args): + label_by_color( + ax, df_in, label, + x_col=x_col, y_col=y_col, + size=size, size_txt=size_txt, + edgecolors='black', facecolors='#de2d26', + textcolor='black', + t_x=t_x, t_y=t_y, + **args) + + +# def drawRhoGammaScatter( +# self, ax, +# rho_df=None, gamma_df=None, +# dot_size=1, +# score_col='score', +# xlabel='auto', +# ylabel='auto', +# xlims='auto', +# ylims='auto', +# **args +# ): +# #TODO: fix by making a single dataframe with both rho and gamma scores +# if rho_df is None: +# _, _, rho_df = self._prep_data() +# if gamma_df is None: +# gamma_df, _, _ = self._prep_data() + +# if xlabel == 'auto': +# xlabel = self.rho_score_name.replace(':', ': ').replace('_', ' ') +# if ylabel == 'auto': +# ylabel = self.gamma_score_name.replace(':', ': ').replace('_', ' ') - def drawVolcanoRho( - self, ax, - rho_df=None, - dot_size=1, - score_col='score', - pvalue_col='pvalue', - xlabel='auto', - ylabel='-log10(pvalue)', - xlims='auto', - ylims='auto', - **args - ): - if rho_df is None: - _, _, rho_df = self._prep_data() - if xlabel == 'auto': - xlabel = self.rho_score_name.replace(':', ': ').replace('_', ' ') - - volcano_plot(ax, rho_df, - up_hit='resistance_hit', down_hit='sensitivity_hit', - score_col=score_col, pvalue_col=pvalue_col, - xlabel=xlabel, ylabel=ylabel, - dot_size=dot_size, xlims=xlims, ylims=ylims, - ctrl_label=self.ctrl_label, - **args) +# # color by rho score labels +# up_hit = 'resistance_hit' +# down_hit = 'sensitivity_hit' + +# # Scatter plot for each category +# ax.scatter( rho_df.loc[rho_df['label'] == 'target_non_hit', score_col], +# gamma_df.loc[rho_df['label'] == 'target_non_hit', score_col], +# alpha=0.1, s=dot_size, c='black', label='target_non_hit', +# **args) - def drawVolcanoGamma( - self, ax, - gamma_df=None, - dot_size=1, - score_col='score', - pvalue_col='pvalue', - xlabel='auto', - ylabel='-log10(pvalue)', - xlims='auto', - ylims='auto', - **args - ): - if gamma_df is None: - gamma_df, _, _ = self._prep_data() - if xlabel == 'auto': - xlabel = self.gamma_score_name.replace(':', ': ').replace('_', ' ') - - volcano_plot(ax, gamma_df, - up_hit='up_hit', down_hit='essential_hit', - score_col=score_col, pvalue_col=pvalue_col, - xlabel=xlabel, ylabel=ylabel, - dot_size=dot_size, xlims=xlims, ylims=ylims, - ctrl_label=self.ctrl_label, - **args) - - def drawVolcanoTau( - self, ax, - tau_df=None, - dot_size=1, - score_col='score', - pvalue_col='pvalue', - xlabel='auto', - ylabel='-log10(pvalue)', - xlims='auto', - ylims='auto', - **args - ): - if tau_df is None: - _, tau_df, _, = self._prep_data() - if xlabel == 'auto': - xlabel = self.tau_score_name.replace(':', ': ').replace('_', ' ') - - volcano_plot(ax, tau_df, - up_hit='up_hit', down_hit='down_hit', - score_col=score_col, pvalue_col=pvalue_col, - xlabel=xlabel, ylabel=ylabel, - dot_size=dot_size, xlims=xlims, ylims=ylims, - ctrl_label=self.ctrl_label, - **args) - - def drawRhoGammaScatter( - self, ax, - rho_df=None, gamma_df=None, - dot_size=1, - score_col='score', - xlabel='auto', - ylabel='auto', - xlims='auto', - ylims='auto', - **args - ): - #TODO: fix by making a single dataframe with both rho and gamma scores - if rho_df is None: - _, _, rho_df = self._prep_data() - if gamma_df is None: - gamma_df, _, _ = self._prep_data() - - if xlabel == 'auto': - xlabel = self.rho_score_name.replace(':', ': ').replace('_', ' ') - if ylabel == 'auto': - ylabel = self.gamma_score_name.replace(':', ': ').replace('_', ' ') - - # color by rho score labels - up_hit = 'resistance_hit' - down_hit = 'sensitivity_hit' - - # Scatter plot for each category - ax.scatter( rho_df.loc[rho_df['label'] == 'target_non_hit', score_col], - gamma_df.loc[rho_df['label'] == 'target_non_hit', score_col], - alpha=0.1, s=dot_size, c='black', label='target_non_hit', - **args) - - ax.scatter( rho_df.loc[rho_df['label'] == up_hit, score_col], - gamma_df.loc[rho_df['label'] == up_hit, score_col], - alpha=0.9, s=dot_size, c='#fcae91', label=up_hit, - **args) - - ax.scatter( rho_df.loc[rho_df['label'] == down_hit, score_col], - gamma_df.loc[rho_df['label'] == down_hit, score_col], - alpha=0.9, s=dot_size, c='#bdd7e7', label=down_hit, - **args) - - ax.scatter( rho_df.loc[rho_df['label'] == self.ctrl_label, score_col], - gamma_df.loc[rho_df['label'] == self.ctrl_label, score_col], - alpha=0.1, s=dot_size, c='gray', label=self.ctrl_label, - **args) - - # Set x-axis and y-axis labels - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) +# ax.scatter( rho_df.loc[rho_df['label'] == up_hit, score_col], +# gamma_df.loc[rho_df['label'] == up_hit, score_col], +# alpha=0.9, s=dot_size, c='#fcae91', label=up_hit, +# **args) + +# ax.scatter( rho_df.loc[rho_df['label'] == down_hit, score_col], +# gamma_df.loc[rho_df['label'] == down_hit, score_col], +# alpha=0.9, s=dot_size, c='#bdd7e7', label=down_hit, +# **args) + +# ax.scatter( rho_df.loc[rho_df['label'] == self.ctrl_label, score_col], +# gamma_df.loc[rho_df['label'] == self.ctrl_label, score_col], +# alpha=0.1, s=dot_size, c='gray', label=self.ctrl_label, +# **args) + +# # Set x-axis and y-axis labels +# ax.set_xlabel(xlabel) +# ax.set_ylabel(ylabel) - # Set x-axis limits - ax.set_xlim(xlims) - ax.set_ylim(ylims) +# # Set x-axis limits +# ax.set_xlim(xlims) +# ax.set_ylim(ylims) - # Add legend - ax.legend() - - def label_as_black(self, ax, df_in, label, - x_col='score', y_col='-log10(pvalue)', - size=2, size_txt="auto", - t_x=.5, t_y=-0.1, - **args): - label_by_color( - ax, df_in, label, - x_col=x_col, y_col=y_col, - size=size, size_txt=size_txt, - edgecolors='black', facecolors='black', - textcolor='black', - t_x=t_x, t_y=t_y, - **args) - - def label_sensitivity_hit(self, ax, df_in, label, - x_col='score', y_col='-log10(pvalue)', - size=2, size_txt="auto", - t_x=.5, t_y=-0.1, - **args): - label_by_color( - ax, df_in, label, - x_col=x_col, y_col=y_col, - size=size, size_txt=size_txt, - edgecolors='black', facecolors='#3182bd', - textcolor='black', - t_x=t_x, t_y=t_y, - **args) +# # Add legend +# ax.legend() - def label_resistance_hit(self, ax, df_in, label, - x_col='score', y_col='-log10(pvalue)', - size=2, size_txt="auto", - t_x=.5, t_y=-0.1, - **args): - label_by_color( - ax, df_in, label, - x_col=x_col, y_col=y_col, - size=size, size_txt=size_txt, - edgecolors='black', facecolors='#de2d26', - textcolor='black', - t_x=t_x, t_y=t_y, - **args)