ArcInstitute · abearab · Aug 2, 2024 · Jul 28, 2024 · Jul 28, 2024 · Jul 29, 2024
diff --git a/screenpro/__init__.py b/screenpro/__init__.py
@@ -31,6 +31,6 @@
 from .dashboard import DrugScreenDashboard
 
 
-__version__ = "0.4.7"
+__version__ = "0.4.8"
 __author__ = "Abe Arab"
 __email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com"
diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py
@@ -16,6 +16,7 @@
 from ..phenoscore import runPhenoScore, runPhenoScoreForReplicate
 from ..preprocessing import addPseudoCount, findLowCounts, normalizeSeqDepth
 from ..phenoscore.annotate import annotateScoreTable, hit_dict
+from ..plotting import volcano_plot, label_resistance_hit, label_sensitivity_hit
 
 import warnings
 from copy import copy
@@ -251,34 +252,6 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_
                 **kwargs
             )
             self._add_phenotype_results(run_name, f'rho:{rho_name}', rho)
-
-        # gnerate replicate level phenotype scores
-        pdata_dict = {}
-        for score_name in self.phenotypes[score_level]['results'].keys():
-            score_label, comparison = score_name.split(':')
-            y_label, x_label = comparison.split('_vs_')
-
-            #TODO: get growth rates for replicate level scores
-
-
-            pdata_dict.update({
-                score_name: runPhenoScoreForReplicate(
-                    self.adata, x_label = x_label, y_label = y_label,
-                    transformation=self.fc_transformation, 
-                    # growth_factor_reps=
-                    # **kwargs
-                ).add_prefix(f'{score_label}_').T # transpose to match pdata format
-            })
-
-        pdata_df = pd.concat(pdata_dict, axis=0)
-
-        #TODO: fix `_calculateGrowthFactor` and `_getTreatmentDoublingRate` to maintain same format
-        # add .pdata
-        self.pdata = ad.AnnData(
-            X = pdata_df,
-            # obs = growth_factor_table.loc[pdata_df.index,:],
-            var=self.adata.var
-        )
 
     def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None, **kwargs):
         """
@@ -328,20 +301,24 @@ def listPhenotypeScores(self, run_name='auto'):
 
         return out
 
-    def getPhenotypeScores(self, score_name, threshold, run_name='auto', ctrl_label='negative_control', target_col='target',pvalue_col='ttest pvalue', score_col='score'):
-        """
-        Get phenotype scores for a given score level
-
-        Args:
-            score_name (str): name of the score to retrieve, e.g. 'gamma', 'tau', 'rho', 'delta'
-            threshold (float): threshold for filtering significant hits, default is 5
-            run_name (str): name of the phenotype calculation run to retrieve
-            ctrl_label (str): label for the negative control, default is 'negative_control'
-            target_col (str): column name for the target gene, default is 'target'
-            pvalue_column (str): column name for the p-value, default is 'ttest pvalue'
-            score_column (str): column name for the score, default is 'score'
-        """
-
+    def drawVolcano(
+            self, ax,
+            phenotype_name,
+            threshold,
+            dot_size=1,
+            run_name='auto',
+            score_col='score', 
+            pvalue_col='pvalue',
+            xlabel='auto',
+            ylabel='-log10(pvalue)',
+            xlims='auto',
+            ylims='auto',
+            ctrl_label='negative_control',
+            resistance_hits=None,
+            sensitivity_hits=None,
+            size_txt=None,
+            **args
+            ):
         if run_name == 'auto':
             if len(list(self.phenotypes.keys())) == 1:
                 run_name = list(self.phenotypes.keys())[0]
@@ -352,88 +329,56 @@ def getPhenotypeScores(self, score_name, threshold, run_name='auto', ctrl_label=
                     '' + ', '.join(self.phenotypes.keys())
                 )
 
-        if score_name not in self.phenotype_names:
-            raise ValueError(f"Phenotype '{score_name}' not found in self.phenotype_names")
+        score_tag, _ = phenotype_name.split(':')
 
-        keep_col = [target_col, score_col, pvalue_col]
-        score_tag = score_name.split(':')[0]
-        out = annotateScoreTable(
-            self.phenotypes[run_name][score_name].loc[:,keep_col],
-            threshold=threshold,
-            up_hit=hit_dict[score_tag]['up_hit'],
+        df = self.phenotypes[run_name]['results'][phenotype_name]
+
+        df = annotateScoreTable(
+            df, 
+            up_hit=hit_dict[score_tag]['up_hit'], 
             down_hit=hit_dict[score_tag]['down_hit'],
-            ctrl_label=ctrl_label, 
-            score_col=score_col,
-            pvalue_col=pvalue_col
+            score_col=score_col, pvalue_col=pvalue_col,
+            ctrl_label=ctrl_label,
+            threshold=threshold,
         )
 
-        return out
-
-    def getAnnotatedTable(self, threshold, run_name='auto', ctrl_label='negative_control', target_col='target', pvalue_col='ttest pvalue', score_col='score'):
-        """
-        Returns an annotated table with scores, labels, and replicate phenotypes.
-
-        Args:
-            threshold (int, optional): The threshold value for determining hits. Defaults to 5.
-            run_name (str, optional): The name of the phenotype calculation run. Defaults to 'auto'.
-            ctrl_label (str, optional): The label for the control group. Defaults to 'negative_control'.
-            target_col (str, optional): The column name for the target. Defaults to 'target'.
-            pvalue_column (str, optional): The column name for the p-value. Defaults to 'ttest pvalue'.
-            score_column (str, optional): The column name for the score. Defaults to 'score'.
+        df['-log10(pvalue)'] = -np.log10(df[pvalue_col])
 
-        Returns:
-            pandas.DataFrame: An annotated table with scores, labels, and replicate phenotypes.
-        """
-        if run_name == 'auto':
-            if len(list(self.phenotypes.keys())) == 1:
-                run_name = list(self.phenotypes.keys())[0]
-            else:
-                raise ValueError(
-                    'Multiple phenotype calculation runs found.'
-                    'Please specify run_name. Available runs: '
-                    '' + ', '.join(self.phenotypes.keys())
+        if xlabel == 'auto':
+            xlabel = phenotype_name.replace(':', ': ').replace('_', ' ')
+
+        volcano_plot(ax, df, 
+                      up_hit=hit_dict[score_tag]['up_hit'], 
+                      down_hit=hit_dict[score_tag]['down_hit'],
+                      score_col=score_col, pvalue_col=pvalue_col,
+                      xlabel=xlabel, ylabel=ylabel,
+                      dot_size=dot_size, xlims=xlims, ylims=ylims,
+                      ctrl_label=ctrl_label,
+                      **args)
+
+        if resistance_hits != None:
+            if type(resistance_hits) != list: resistance_hits = [resistance_hits]
+            for hit in resistance_hits:
+                label_resistance_hit(
+                    ax=ax, df_in=df, label=hit,
+                    x_col=score_col,
+                    y_col='-log10(pvalue)',
+                    size=dot_size * 2,
+                    size_txt=size_txt
                 )
-
-        keep_col = [target_col, score_col, pvalue_col]
 
-        # self.phenotypes[run_name] = pd.concat({
-        #     f'gamma:{gamma_name}': gamma, f'tau:{tau_name}': tau, f'rho:{rho_name}': rho
-        # }, axis=1)
-
-        score_names = set(self.phenotypes[run_name]['results'].keys())
-        sort_var = self.adata.var.sort_values(['targetType','target']).index.to_list()
-
-        df_list = {}
-        for score_name in score_names:
-            score_tag = score_name.split(':')[0]
-
-            # get annotated table
-            df_ann = annotateScoreTable(
-                self.phenotypes[run_name][score_name].loc[:,keep_col],
-                up_hit=hit_dict[score_tag]['up_hit'],
-                down_hit=hit_dict[score_tag]['down_hit'],
-                score_col=score_col,
-                pvalue_col=pvalue_col,
-                ctrl_label=ctrl_label,
-                threshold=threshold
+        if sensitivity_hits != None:
+            if type(sensitivity_hits) != list: sensitivity_hits = [sensitivity_hits]
+            for hit in sensitivity_hits:
+                label_sensitivity_hit(
+                    ax=ax, df_in=df, label=hit,
+                    x_col=score_col,
+                    y_col='-log10(pvalue)',
+                    pvalue_col=pvalue_col,
+                    size=dot_size * 2,
+                    size_txt=size_txt
             )
 
-            # get replicate phe
-            df_phe_reps = self.pdata[self.pdata.obs.score.eq(score_tag)].to_df().T
-
-            # make table
-            df = pd.concat([
-                df_ann.drop(columns=['label']),
-                df_phe_reps, 
-                df_ann['label']
-            ],axis=1).loc[sort_var,:]
-
-            df_list.update({score_name:df})
-
-        out = pd.concat(df_list,axis=1)
-
-        return out
-
 
 class GImaps(object):
-    pass
+    pass
diff --git a/screenpro/phenoscore/annotate.py b/screenpro/phenoscore/annotate.py
@@ -50,7 +50,7 @@ def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='con
     return df[score_col]/pseudo_sd * -np.log10(df[pvalue_col])
 
 
-def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalue_col=None, ctrl_label='control'):
+def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalue_col=None, ctrl_label='negative_control'):
     """
     Annotate the given score tabel 
 

diff --git a/screenpro/plotting/__init__.py b/screenpro/plotting/__init__.py
@@ -6,5 +6,4 @@
 import numpy as np
 import scanpy as sc
 from .qc_plots import plotReplicateScatter, plotCountDistribution
-from .pheno_plots import volcano_plot, label_by_color
-from .pheno_plots import DrugScreenPlotter
+from .pheno_plots import volcano_plot, label_by_color, label_resistance_hit, label_sensitivity_hit