Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

minor code improvements #86

Merged
merged 13 commits into from
Aug 2, 2024
2 changes: 1 addition & 1 deletion screenpro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@
from .dashboard import DrugScreenDashboard


__version__ = "0.4.7"
__version__ = "0.4.8"
__author__ = "Abe Arab"
__email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com"
177 changes: 61 additions & 116 deletions screenpro/assays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from ..phenoscore import runPhenoScore, runPhenoScoreForReplicate
from ..preprocessing import addPseudoCount, findLowCounts, normalizeSeqDepth
from ..phenoscore.annotate import annotateScoreTable, hit_dict
from ..plotting import volcano_plot, label_resistance_hit, label_sensitivity_hit

import warnings
from copy import copy
Expand Down Expand Up @@ -251,34 +252,6 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_
**kwargs
)
self._add_phenotype_results(run_name, f'rho:{rho_name}', rho)

# gnerate replicate level phenotype scores
pdata_dict = {}
for score_name in self.phenotypes[score_level]['results'].keys():
score_label, comparison = score_name.split(':')
y_label, x_label = comparison.split('_vs_')

#TODO: get growth rates for replicate level scores


pdata_dict.update({
score_name: runPhenoScoreForReplicate(
self.adata, x_label = x_label, y_label = y_label,
transformation=self.fc_transformation,
# growth_factor_reps=
# **kwargs
).add_prefix(f'{score_label}_').T # transpose to match pdata format
})

pdata_df = pd.concat(pdata_dict, axis=0)

#TODO: fix `_calculateGrowthFactor` and `_getTreatmentDoublingRate` to maintain same format
# add .pdata
self.pdata = ad.AnnData(
X = pdata_df,
# obs = growth_factor_table.loc[pdata_df.index,:],
var=self.adata.var
)

def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None, **kwargs):
"""
Expand Down Expand Up @@ -328,20 +301,24 @@ def listPhenotypeScores(self, run_name='auto'):

return out

def getPhenotypeScores(self, score_name, threshold, run_name='auto', ctrl_label='negative_control', target_col='target',pvalue_col='ttest pvalue', score_col='score'):
"""
Get phenotype scores for a given score level

Args:
score_name (str): name of the score to retrieve, e.g. 'gamma', 'tau', 'rho', 'delta'
threshold (float): threshold for filtering significant hits, default is 5
run_name (str): name of the phenotype calculation run to retrieve
ctrl_label (str): label for the negative control, default is 'negative_control'
target_col (str): column name for the target gene, default is 'target'
pvalue_column (str): column name for the p-value, default is 'ttest pvalue'
score_column (str): column name for the score, default is 'score'
"""

def drawVolcano(
self, ax,
phenotype_name,
threshold,
dot_size=1,
run_name='auto',
score_col='score',
pvalue_col='pvalue',
xlabel='auto',
ylabel='-log10(pvalue)',
xlims='auto',
ylims='auto',
ctrl_label='negative_control',
resistance_hits=None,
sensitivity_hits=None,
size_txt=None,
**args
):
if run_name == 'auto':
if len(list(self.phenotypes.keys())) == 1:
run_name = list(self.phenotypes.keys())[0]
Expand All @@ -352,88 +329,56 @@ def getPhenotypeScores(self, score_name, threshold, run_name='auto', ctrl_label=
'' + ', '.join(self.phenotypes.keys())
)

if score_name not in self.phenotype_names:
raise ValueError(f"Phenotype '{score_name}' not found in self.phenotype_names")
score_tag, _ = phenotype_name.split(':')

keep_col = [target_col, score_col, pvalue_col]
score_tag = score_name.split(':')[0]
out = annotateScoreTable(
self.phenotypes[run_name][score_name].loc[:,keep_col],
threshold=threshold,
up_hit=hit_dict[score_tag]['up_hit'],
df = self.phenotypes[run_name]['results'][phenotype_name]

df = annotateScoreTable(
df,
up_hit=hit_dict[score_tag]['up_hit'],
down_hit=hit_dict[score_tag]['down_hit'],
ctrl_label=ctrl_label,
score_col=score_col,
pvalue_col=pvalue_col
score_col=score_col, pvalue_col=pvalue_col,
ctrl_label=ctrl_label,
threshold=threshold,
)

return out

def getAnnotatedTable(self, threshold, run_name='auto', ctrl_label='negative_control', target_col='target', pvalue_col='ttest pvalue', score_col='score'):
"""
Returns an annotated table with scores, labels, and replicate phenotypes.

Args:
threshold (int, optional): The threshold value for determining hits. Defaults to 5.
run_name (str, optional): The name of the phenotype calculation run. Defaults to 'auto'.
ctrl_label (str, optional): The label for the control group. Defaults to 'negative_control'.
target_col (str, optional): The column name for the target. Defaults to 'target'.
pvalue_column (str, optional): The column name for the p-value. Defaults to 'ttest pvalue'.
score_column (str, optional): The column name for the score. Defaults to 'score'.
df['-log10(pvalue)'] = -np.log10(df[pvalue_col])

Returns:
pandas.DataFrame: An annotated table with scores, labels, and replicate phenotypes.
"""
if run_name == 'auto':
if len(list(self.phenotypes.keys())) == 1:
run_name = list(self.phenotypes.keys())[0]
else:
raise ValueError(
'Multiple phenotype calculation runs found.'
'Please specify run_name. Available runs: '
'' + ', '.join(self.phenotypes.keys())
if xlabel == 'auto':
xlabel = phenotype_name.replace(':', ': ').replace('_', ' ')

volcano_plot(ax, df,
up_hit=hit_dict[score_tag]['up_hit'],
down_hit=hit_dict[score_tag]['down_hit'],
score_col=score_col, pvalue_col=pvalue_col,
xlabel=xlabel, ylabel=ylabel,
dot_size=dot_size, xlims=xlims, ylims=ylims,
ctrl_label=ctrl_label,
**args)

if resistance_hits != None:
if type(resistance_hits) != list: resistance_hits = [resistance_hits]
for hit in resistance_hits:
label_resistance_hit(
ax=ax, df_in=df, label=hit,
x_col=score_col,
y_col='-log10(pvalue)',
size=dot_size * 2,
size_txt=size_txt
)

keep_col = [target_col, score_col, pvalue_col]

# self.phenotypes[run_name] = pd.concat({
# f'gamma:{gamma_name}': gamma, f'tau:{tau_name}': tau, f'rho:{rho_name}': rho
# }, axis=1)

score_names = set(self.phenotypes[run_name]['results'].keys())
sort_var = self.adata.var.sort_values(['targetType','target']).index.to_list()

df_list = {}
for score_name in score_names:
score_tag = score_name.split(':')[0]

# get annotated table
df_ann = annotateScoreTable(
self.phenotypes[run_name][score_name].loc[:,keep_col],
up_hit=hit_dict[score_tag]['up_hit'],
down_hit=hit_dict[score_tag]['down_hit'],
score_col=score_col,
pvalue_col=pvalue_col,
ctrl_label=ctrl_label,
threshold=threshold
if sensitivity_hits != None:
if type(sensitivity_hits) != list: sensitivity_hits = [sensitivity_hits]
for hit in sensitivity_hits:
label_sensitivity_hit(
ax=ax, df_in=df, label=hit,
x_col=score_col,
y_col='-log10(pvalue)',
pvalue_col=pvalue_col,
size=dot_size * 2,
size_txt=size_txt
)

# get replicate phe
df_phe_reps = self.pdata[self.pdata.obs.score.eq(score_tag)].to_df().T

# make table
df = pd.concat([
df_ann.drop(columns=['label']),
df_phe_reps,
df_ann['label']
],axis=1).loc[sort_var,:]

df_list.update({score_name:df})

out = pd.concat(df_list,axis=1)

return out


class GImaps(object):
pass
pass
2 changes: 1 addition & 1 deletion screenpro/phenoscore/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='con
return df[score_col]/pseudo_sd * -np.log10(df[pvalue_col])


def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalue_col=None, ctrl_label='control'):
def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalue_col=None, ctrl_label='negative_control'):
"""
Annotate the given score tabel

Expand Down
3 changes: 1 addition & 2 deletions screenpro/plotting/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,4 @@
import numpy as np
import scanpy as sc
from .qc_plots import plotReplicateScatter, plotCountDistribution
from .pheno_plots import volcano_plot, label_by_color
from .pheno_plots import DrugScreenPlotter
from .pheno_plots import volcano_plot, label_by_color, label_resistance_hit, label_sensitivity_hit
Loading
Loading