From e2f7cf4c25d6079fdd9fd19367f901cdae033cd1 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 26 Jul 2024 00:46:02 -0700 Subject: [PATCH 01/23] update assays module organization --- screenpro/{assays.py => assays/__init__.py} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename screenpro/{assays.py => assays/__init__.py} (98%) diff --git a/screenpro/assays.py b/screenpro/assays/__init__.py similarity index 98% rename from screenpro/assays.py rename to screenpro/assays/__init__.py index 672f454..085d777 100644 --- a/screenpro/assays.py +++ b/screenpro/assays/__init__.py @@ -12,10 +12,10 @@ import anndata as ad import scanpy as sc -from .phenoscore import runDESeq, extractDESeqResults -from .phenoscore import runPhenoScore, runPhenoScoreForReplicate -from .preprocessing import addPseudoCount, findLowCounts, normalizeSeqDepth -from .phenoscore.annotate import annotateScoreTable, hit_dict +from ..phenoscore import runDESeq, extractDESeqResults +from ..phenoscore import runPhenoScore, runPhenoScoreForReplicate +from ..preprocessing import addPseudoCount, findLowCounts, normalizeSeqDepth +from ..phenoscore.annotate import annotateScoreTable, hit_dict import warnings from copy import copy From 8f70e26f566294914fffdfedb196f4d66add0d15 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 26 Jul 2024 00:55:36 -0700 Subject: [PATCH 02/23] enable `runPhenoScoreForReplicate` without `growth_factor_table` --- screenpro/assays/__init__.py | 43 ++++++++++++++++---------------- screenpro/phenoscore/__init__.py | 20 +++++++++------ 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index 085d777..2631273 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -208,28 +208,27 @@ def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col=' self._add_phenotype_results(f'tau:{tau_name}') self._add_phenotype_results(f'rho:{rho_name}') - if growth_factor_table: - # get replicate level phenotype scores - pdata_df = pd.concat([ - runPhenoScoreForReplicate( - self.adata, x_label = x_label, y_label = y_label, score = score_label, - transformation=self.fc_transformation, - growth_factor_table=growth_factor_table, - **kwargs - ).add_prefix(f'{score_label}_') - - for x_label, y_label, score_label in [ - ('T0', untreated, 'gamma'), - ('T0', treated, 'tau'), - (untreated, treated, 'rho') - ] - ],axis=1).T - # add .pdata - self.pdata = ad.AnnData( - X = pdata_df, - obs = growth_factor_table.loc[pdata_df.index,:], - var=self.adata.var - ) + # get replicate level phenotype scores + pdata_df = pd.concat([ + runPhenoScoreForReplicate( + self.adata, x_label = x_label, y_label = y_label, score = score_label, + transformation=self.fc_transformation, + growth_factor_table=growth_factor_table, + **kwargs + ).add_prefix(f'{score_label}_') + + for x_label, y_label, score_label in [ + ('T0', untreated, 'gamma'), + ('T0', treated, 'tau'), + (untreated, treated, 'rho') + ] + ],axis=1).T + # add .pdata + self.pdata = ad.AnnData( + X = pdata_df, + obs = growth_factor_table.loc[pdata_df.index,:], + var=self.adata.var + ) def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None, **kwargs): """ diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index e4ac013..7eab337 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -68,7 +68,7 @@ def _generatePseudoGeneAnnData(adata, num_pseudogenes='auto', pseudogene_size='a return out -def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, +def runPhenoScore(adata, cond_ref, cond_test, score_level, test, transformation='log2', growth_rate=1, n_reps='auto', keep_top_n = None,num_pseudogenes='auto', pseudogene_size='auto', count_layer=None, ctrl_label='negative_control'): """Calculate phenotype score and p-values when comparing `cond_test` vs `cond_ref`. @@ -77,9 +77,9 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, adata (AnnData): AnnData object cond_ref (str): condition reference cond_test (str): condition test - transformation (str): transformation to use for calculating score - test (str): test to use for calculating p-value ('MW': Mann-Whitney U rank; 'ttest' : t-test) score_level (str): score level + test (str): test to use for calculating p-value ('MW': Mann-Whitney U rank; 'ttest' : t-test) + transformation (str): transformation to use for calculating score growth_rate (int): growth rate n_reps (int): number of replicates keep_top_n (int): number of top guides to keep per target @@ -226,7 +226,7 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, return result_name, result -def runPhenoScoreForReplicate(adata, x_label, y_label, score, growth_factor_table, transformation, ctrl_label='negative_control'): +def runPhenoScoreForReplicate(adata, x_label, y_label, score, growth_factor_table=None, transformation='log2', ctrl_label='negative_control'): """Calculate phenotype score for each pair of replicates. Args: @@ -248,14 +248,20 @@ def runPhenoScoreForReplicate(adata, x_label, y_label, score, growth_factor_tabl results = {} for replicate in adat.obs.replicate.unique(): + + if growth_factor_table: + growth_rate = growth_factor_table.query( + f'score=="{score}" & replicate=={replicate}' + )['growth_factor'].values[0] + else: + growth_rate = 1 + res = calculatePhenotypeScore( x=adat[adat.obs.query(f'condition == "{x_label}" & replicate == {str(replicate)}').index].X, y=adat[adat.obs.query(f'condition == "{y_label}" & replicate == {str(replicate)}').index].X, - x_ctrl=adat_ctrl[adat_ctrl.obs.query(f'condition == "{x_label}" & replicate == {str(replicate)}').index].X, y_ctrl=adat_ctrl[adat_ctrl.obs.query(f'condition == "{y_label}" & replicate == {str(replicate)}').index].X, - - growth_rate=growth_factor_table.query(f'score=="{score}" & replicate=={replicate}')['growth_factor'].values[0], + growth_rate=growth_rate, transformation=transformation, level='row' # there is only one column so `row` option here is equivalent to the value before averaging. ) From 374238c4daa503699c049ab1366bf1c291dec100 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 26 Jul 2024 01:19:33 -0700 Subject: [PATCH 03/23] mend --- screenpro/assays/__init__.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index 2631273..7f1d866 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -44,14 +44,6 @@ def __init__(self, adata, fc_transformation='log2', test='ttest', n_reps=3, verb self.phenotype_names = [] self.verbose = verbose - # def __repr__(self): - # descriptions = '' - # for score_level in self.phenotypes.keys(): - # scores = "', '".join(self.phenotypes[score_level].columns.get_level_values(0).unique().to_list()) - # descriptions += f"Phenotypes in score_level = '{score_level}':\n scores: '{scores}'\n" - - # return f'obs->samples\nvar->elementss\n\n{self.__repr__()}\n\n{descriptions}' - def copy(self): return copy(self) @@ -189,7 +181,7 @@ def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col=' transformation=self.fc_transformation, test=self.test, score_level=score_level, **kwargs ) - # TO-DO: warning / error if db_untreated and db_treated are too close, i.e. growth_rate ~= 0. + #TODO: warning / error if db_untreated and db_treated are too close, i.e. growth_rate ~= 0. rho_name, rho = runPhenoScore( self.adata, cond_ref=untreated, cond_test=treated, growth_rate=db_treated_vs_untreated, n_reps=self.n_reps, @@ -209,12 +201,13 @@ def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col=' self._add_phenotype_results(f'rho:{rho_name}') # get replicate level phenotype scores + #TODO: move this to a separate function / method pdata_df = pd.concat([ runPhenoScoreForReplicate( self.adata, x_label = x_label, y_label = y_label, score = score_label, transformation=self.fc_transformation, growth_factor_table=growth_factor_table, - **kwargs + # **kwargs ).add_prefix(f'{score_label}_') for x_label, y_label, score_label in [ From b90e5393ff7c7ff1e33f35065bec5acef0e0ff34 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 26 Jul 2024 01:52:22 -0700 Subject: [PATCH 04/23] update `pyDESeq2` method --- screenpro/assays/__init__.py | 59 ++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index 7f1d866..ea6b100 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -109,35 +109,56 @@ def countNormalization(self, pseudo_count_value=0.5): if self.verbose: print('Counts normalized by sequencing depth.') - def calculateDrugScreenDESeq(self, t0, untreated, treated, run_name=None, **kwargs): + def calculateDrugScreenDESeq(self, untreated, treated, t0=None, run_name='pyDESeq2', **kwargs): """ Calculate DESeq2 results for a given drug screen dataset. Args: - design (str): design matrix for DESeq2 - run_name (str): name for the DESeq2 calculation run + design (str): design matrix for DESeq2-based analysis + untreated (str): name of the untreated condition + treated (str): name of the treated condition + t0 (str): name of the untreated condition + run_name (str): name for the phenotype calculation run **kwargs: additional arguments to pass to runDESeq """ - dds = runDESeq(self.adata, 'condition', **kwargs) - - # Calculate `gamma`, `rho`, and `tau` phenotype scores - gamma_name, gamma = extractDESeqResults( - dds, 'condition', t0, untreated, **kwargs - ) + if run_name in self.phenotypes.keys(): + raise ValueError(f"Phenotype calculation run '{run_name}' already exists in self.phenoypes!") - tau_name, tau = extractDESeqResults( - dds, 'condition', t0, treated, **kwargs - ) + self.phenotypes[run_name] = {} + + self.phenotypes[run_name]['config'] = { + 'phenoscore_method':'pyDESeq2', + 'untreated':untreated, + 'treated':treated, + 't0':t0, + 'n_reps':self.n_reps, + } + self.phenotypes[run_name]['results'] = {} + + if type(treated) != list: treated = [treated] + + # run pyDESeq2 analysis + dds = runDESeq(self.adata, 'condition', **kwargs) - rho_name, rho = extractDESeqResults( - dds, 'condition', untreated, treated, **kwargs - ) + # extract comparison results + if t0: + # Calculate `gamma`, `rho`, and `tau` phenotype scores + gamma_name, gamma = extractDESeqResults( + dds, 'condition', t0, untreated, **kwargs + ) + self.phenotypes[run_name]['results'][gamma_name] = gamma - if not run_name: run_name = 'pyDESeq2' + for tr in treated: + tau_name, tau = extractDESeqResults( + dds, 'condition', t0, treated, **kwargs + ) + self.phenotypes[run_name]['results'][tau_name] = tau - self.phenotypes[run_name] = pd.concat({ - f'gamma:{gamma_name}': gamma, f'tau:{tau_name}': tau, f'rho:{rho_name}': rho - }, axis=1) + for tr in treated: + rho_name, rho = extractDESeqResults( + dds, 'condition', untreated, tr, **kwargs + ) + self.phenotypes[run_name]['results'][rho_name] = rho def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col='pop_doublings', run_name=None, **kwargs): """ From 0454223d7b8e61bf790cf4e9c56a810baae37d2b Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 26 Jul 2024 02:30:40 -0700 Subject: [PATCH 05/23] update `calculateDrugScreen` method --- screenpro/assays/__init__.py | 202 ++++++++++++++++++++--------------- 1 file changed, 113 insertions(+), 89 deletions(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index ea6b100..2e1aeb4 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -41,16 +41,15 @@ def __init__(self, adata, fc_transformation='log2', test='ttest', n_reps=3, verb self.test = test self.n_reps = n_reps self.phenotypes = {} - self.phenotype_names = [] self.verbose = verbose def copy(self): return copy(self) - def _add_phenotype_results(self, phenotype_name): - if phenotype_name in self.phenotype_names: - raise ValueError(f"Phenotype '{phenotype_name}' already exists in self.phenotype_names!") - self.phenotype_names.append(phenotype_name) + def _add_phenotype_results(self, phenotype_name, phenotype_table): + if phenotype_name in self.phenotypes['results'].keys(): + raise ValueError(f"Phenotype '{phenotype_name}' already exists in self.phenotypes['results']!") + self.phenotypes['results'][phenotype_name] = phenotype_table def _calculateGrowthFactor(self, untreated, treated, db_rate_col): """ @@ -80,6 +79,25 @@ def _calculateGrowthFactor(self, untreated, treated, db_rate_col): return out + def _getTreatmentDoublingRate(self, untreated, treated, db_rate_col): + if 'pop_doublings' not in self.adata.obs.columns or db_rate_col == None: + warnings.warn('No doubling rate information provided.') + db_untreated = 1 + db_treated = 1 + db_diff = 1 + growth_factor_table = None + + else: + growth_factor_table = self._calculateGrowthFactor( + untreated = untreated, treated = treated, db_rate_col = db_rate_col + ) + + db_untreated=growth_factor_table.query(f'score=="gamma"')['growth_factor'].mean() + db_treated=growth_factor_table.query(f'score=="tau"')['growth_factor'].mean() + db_diff = np.abs(db_untreated - db_treated) + + return db_untreated, db_treated, db_diff + def filterLowCounts(self, filter_type='all', minimum_reads=50): """ Filter low counts in adata.X @@ -123,17 +141,17 @@ def calculateDrugScreenDESeq(self, untreated, treated, t0=None, run_name='pyDESe """ if run_name in self.phenotypes.keys(): raise ValueError(f"Phenotype calculation run '{run_name}' already exists in self.phenoypes!") - - self.phenotypes[run_name] = {} - - self.phenotypes[run_name]['config'] = { - 'phenoscore_method':'pyDESeq2', - 'untreated':untreated, - 'treated':treated, - 't0':t0, - 'n_reps':self.n_reps, - } - self.phenotypes[run_name]['results'] = {} + else: + self.phenotypes[run_name] = {} + + self.phenotypes[run_name]['config'] = { + 'method':'pyDESeq2', + 'untreated':untreated, + 'treated':treated, + 't0':t0, + 'n_reps':self.n_reps, + } + self.phenotypes[run_name]['results'] = {} if type(treated) != list: treated = [treated] @@ -141,108 +159,110 @@ def calculateDrugScreenDESeq(self, untreated, treated, t0=None, run_name='pyDESe dds = runDESeq(self.adata, 'condition', **kwargs) # extract comparison results - if t0: + if t0 != None and type(treated) == str: # Calculate `gamma`, `rho`, and `tau` phenotype scores gamma_name, gamma = extractDESeqResults( dds, 'condition', t0, untreated, **kwargs ) - self.phenotypes[run_name]['results'][gamma_name] = gamma + self._add_phenotype_results(f'gamma:{gamma_name}', gamma) for tr in treated: tau_name, tau = extractDESeqResults( dds, 'condition', t0, treated, **kwargs ) - self.phenotypes[run_name]['results'][tau_name] = tau + self._add_phenotype_results(f'tau:{tau_name}', tau) for tr in treated: rho_name, rho = extractDESeqResults( dds, 'condition', untreated, tr, **kwargs ) - self.phenotypes[run_name]['results'][rho_name] = rho + self._add_phenotype_results(f'rho:{rho_name}', rho) - def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col='pop_doublings', run_name=None, **kwargs): + def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_col='pop_doublings', run_name=None, **kwargs): """ Calculate `gamma`, `rho`, and `tau` phenotype scores for a drug screen dataset in a given `score_level`. Args: - t0 (str): name of the untreated condition + score_level (str): name of the score level untreated (str): name of the untreated condition treated (str): name of the treated condition - score_level (str): name of the score level + t0 (str): name of the untreated condition db_rate_col (str): column name for the doubling rate, default is 'pop_doublings' run_name (str): name for the phenotype calculation run **kwargs: additional arguments to pass to runPhenoScore """ - if 'pop_doublings' not in self.adata.obs.columns or db_rate_col == None: - warnings.warn('No doubling rate information provided.') - db_untreated = 1 - db_treated = 1 - db_treated_vs_untreated = 1 - growth_factor_table = None - + if not run_name: run_name = score_level + if run_name in self.phenotypes.keys(): + raise ValueError(f"Phenotype calculation run '{run_name}' already exists in self.phenoypes!") else: - growth_factor_table = self._calculateGrowthFactor( - untreated = untreated, treated = treated, db_rate_col = db_rate_col - ) - - db_untreated=growth_factor_table.query(f'score=="gamma"')['growth_factor'].mean() - db_treated=growth_factor_table.query(f'score=="tau"')['growth_factor'].mean() - db_treated_vs_untreated = np.abs(db_untreated - db_treated) + self.phenotypes[run_name] = {} + self.phenotypes[run_name]['config'] = { + 'method':'ScreenPro2 - phenoscore', + 'untreated':untreated, + 'treated':treated, + 't0':t0, + 'n_reps':self.n_reps, + 'test':self.test, + 'score_level':score_level, + } + self.phenotypes[run_name]['results'] = {} - # calculate phenotype scores: gamma, tau, rho - gamma_name, gamma = runPhenoScore( - self.adata, cond_ref=t0, cond_test=untreated, growth_rate=db_untreated, - n_reps=self.n_reps, - transformation=self.fc_transformation, test=self.test, score_level=score_level, - **kwargs - ) - tau_name, tau = runPhenoScore( - self.adata, cond_ref=t0, cond_test=treated, growth_rate=db_treated, - n_reps=self.n_reps, - transformation=self.fc_transformation, test=self.test, score_level=score_level, - **kwargs - ) - #TODO: warning / error if db_untreated and db_treated are too close, i.e. growth_rate ~= 0. - rho_name, rho = runPhenoScore( - self.adata, cond_ref=untreated, cond_test=treated, growth_rate=db_treated_vs_untreated, - n_reps=self.n_reps, - transformation=self.fc_transformation, test=self.test, score_level=score_level, - **kwargs - ) + if type(treated) != list: treated = [treated] - if not run_name: run_name = score_level - # save all results into a multi-index dataframe - self.phenotypes[run_name] = pd.concat({ - f'gamma:{gamma_name}': gamma, f'tau:{tau_name}': tau, f'rho:{rho_name}': rho - }, axis=1) + if t0 != None and type(treated) == str: + db_untreated,_,_ = self._getTreatmentDoublingRate(self, untreated, treated, db_rate_col) + # calculate phenotype scores: gamma, tau, rho + gamma_name, gamma = runPhenoScore( + self.adata, cond_ref=t0, cond_test=untreated, growth_rate=db_untreated, + n_reps=self.n_reps, + transformation=self.fc_transformation, test=self.test, score_level=score_level, + **kwargs + ) + self._add_phenotype_results(f'gamma:{gamma_name}', gamma) - # save phenotype name for reference - self._add_phenotype_results(f'gamma:{gamma_name}') - self._add_phenotype_results(f'tau:{tau_name}') - self._add_phenotype_results(f'rho:{rho_name}') - - # get replicate level phenotype scores - #TODO: move this to a separate function / method - pdata_df = pd.concat([ - runPhenoScoreForReplicate( - self.adata, x_label = x_label, y_label = y_label, score = score_label, - transformation=self.fc_transformation, - growth_factor_table=growth_factor_table, - # **kwargs - ).add_prefix(f'{score_label}_') - - for x_label, y_label, score_label in [ - ('T0', untreated, 'gamma'), - ('T0', treated, 'tau'), - (untreated, treated, 'rho') - ] - ],axis=1).T - # add .pdata - self.pdata = ad.AnnData( - X = pdata_df, - obs = growth_factor_table.loc[pdata_df.index,:], - var=self.adata.var - ) + for tr in treated: + _, db_tr, db_diff = self._getTreatmentDoublingRate(untreated, tr, db_rate_col) + + if t0 != None and type(treated) == str: + tau_name, tau = runPhenoScore( + self.adata, cond_ref=t0, cond_test=tr, growth_rate=db_tr, + n_reps=self.n_reps, + transformation=self.fc_transformation, test=self.test, score_level=score_level, + **kwargs + ) + self._add_phenotype_results(f'tau:{tau_name}', tau) + + #TODO: warning / error if db_untreated and db_treated are too close, i.e. growth_rate ~= 0. + rho_name, rho = runPhenoScore( + self.adata, cond_ref=untreated, cond_test=treated, growth_rate=db_diff, + n_reps=self.n_reps, + transformation=self.fc_transformation, test=self.test, score_level=score_level, + **kwargs + ) + self._add_phenotype_results(f'rho:{rho_name}', rho) + + # # get replicate level phenotype scores + # #TODO: move this to a separate function / method + # pdata_df = pd.concat([ + # runPhenoScoreForReplicate( + # self.adata, x_label = x_label, y_label = y_label, score = score_label, + # transformation=self.fc_transformation, + # growth_factor_table=growth_factor_table, + # # **kwargs + # ).add_prefix(f'{score_label}_') + + # for x_label, y_label, score_label in [ + # ('T0', untreated, 'gamma'), + # ('T0', treated, 'tau'), + # (untreated, treated, 'rho') + # ] + # ],axis=1).T + # # add .pdata + # self.pdata = ad.AnnData( + # X = pdata_df, + # obs = growth_factor_table.loc[pdata_df.index,:], + # var=self.adata.var + # ) def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None, **kwargs): """ @@ -339,6 +359,10 @@ def getAnnotatedTable(self, threshold, run_name='auto', ctrl_label='negative_con keep_col = [target_col, score_col, pvalue_col] + # self.phenotypes[run_name] = pd.concat({ + # f'gamma:{gamma_name}': gamma, f'tau:{tau_name}': tau, f'rho:{rho_name}': rho + # }, axis=1) + score_names = {s for s, col in self.phenotypes[run_name].columns} sort_var = self.adata.var.sort_values(['targetType','target']).index.to_list() From 30464057bdd503db70ca9a3d64bcb7c09e8acc76 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 26 Jul 2024 02:30:49 -0700 Subject: [PATCH 06/23] update `calculateDrugScreen` method --- screenpro/assays/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index 2e1aeb4..6705ae8 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -222,7 +222,7 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ for tr in treated: _, db_tr, db_diff = self._getTreatmentDoublingRate(untreated, tr, db_rate_col) - + if t0 != None and type(treated) == str: tau_name, tau = runPhenoScore( self.adata, cond_ref=t0, cond_test=tr, growth_rate=db_tr, @@ -241,8 +241,8 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ ) self._add_phenotype_results(f'rho:{rho_name}', rho) - # # get replicate level phenotype scores # #TODO: move this to a separate function / method + # # get replicate level phenotype scores # pdata_df = pd.concat([ # runPhenoScoreForReplicate( # self.adata, x_label = x_label, y_label = y_label, score = score_label, From 8480d8968c5377878035508a850c3a7c63f48894 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 26 Jul 2024 02:33:28 -0700 Subject: [PATCH 07/23] mend --- screenpro/assays/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index 6705ae8..513f3d4 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -210,7 +210,7 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ if type(treated) != list: treated = [treated] if t0 != None and type(treated) == str: - db_untreated,_,_ = self._getTreatmentDoublingRate(self, untreated, treated, db_rate_col) + db_untreated,_,_ = self._getTreatmentDoublingRate(self, untreated, treated[0], db_rate_col) # calculate phenotype scores: gamma, tau, rho gamma_name, gamma = runPhenoScore( self.adata, cond_ref=t0, cond_test=untreated, growth_rate=db_untreated, From 0190dfad71183588ec0ecb50d360bec2c6b24af7 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 26 Jul 2024 02:40:23 -0700 Subject: [PATCH 08/23] mend --- screenpro/assays/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index 513f3d4..7bcba20 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -159,7 +159,7 @@ def calculateDrugScreenDESeq(self, untreated, treated, t0=None, run_name='pyDESe dds = runDESeq(self.adata, 'condition', **kwargs) # extract comparison results - if t0 != None and type(treated) == str: + if t0 != None and type(t0) == str: # Calculate `gamma`, `rho`, and `tau` phenotype scores gamma_name, gamma = extractDESeqResults( dds, 'condition', t0, untreated, **kwargs @@ -209,7 +209,7 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ if type(treated) != list: treated = [treated] - if t0 != None and type(treated) == str: + if t0 != None and type(t0) == str: db_untreated,_,_ = self._getTreatmentDoublingRate(self, untreated, treated[0], db_rate_col) # calculate phenotype scores: gamma, tau, rho gamma_name, gamma = runPhenoScore( @@ -223,7 +223,7 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ for tr in treated: _, db_tr, db_diff = self._getTreatmentDoublingRate(untreated, tr, db_rate_col) - if t0 != None and type(treated) == str: + if t0 != None and type(t0) == str: tau_name, tau = runPhenoScore( self.adata, cond_ref=t0, cond_test=tr, growth_rate=db_tr, n_reps=self.n_reps, @@ -234,7 +234,7 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ #TODO: warning / error if db_untreated and db_treated are too close, i.e. growth_rate ~= 0. rho_name, rho = runPhenoScore( - self.adata, cond_ref=untreated, cond_test=treated, growth_rate=db_diff, + self.adata, cond_ref=untreated, cond_test=tr, growth_rate=db_diff, n_reps=self.n_reps, transformation=self.fc_transformation, test=self.test, score_level=score_level, **kwargs From fba78132fde109b8655c0e7b33e4187142910d3f Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 26 Jul 2024 02:41:44 -0700 Subject: [PATCH 09/23] mend --- screenpro/assays/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index 7bcba20..7bd5611 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -210,7 +210,7 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ if type(treated) != list: treated = [treated] if t0 != None and type(t0) == str: - db_untreated,_,_ = self._getTreatmentDoublingRate(self, untreated, treated[0], db_rate_col) + db_untreated,_,_ = self._getTreatmentDoublingRate(untreated, treated[0], db_rate_col) # calculate phenotype scores: gamma, tau, rho gamma_name, gamma = runPhenoScore( self.adata, cond_ref=t0, cond_test=untreated, growth_rate=db_untreated, From 19fd2ce797c9e49a8ba9a18ee4016640a5692c63 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 26 Jul 2024 02:46:41 -0700 Subject: [PATCH 10/23] mend --- screenpro/assays/__init__.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index 7bd5611..a15091b 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -46,10 +46,10 @@ def __init__(self, adata, fc_transformation='log2', test='ttest', n_reps=3, verb def copy(self): return copy(self) - def _add_phenotype_results(self, phenotype_name, phenotype_table): - if phenotype_name in self.phenotypes['results'].keys(): + def _add_phenotype_results(self, run_name, phenotype_name, phenotype_table): + if phenotype_name in self.phenotypes[run_name]['results'].keys(): raise ValueError(f"Phenotype '{phenotype_name}' already exists in self.phenotypes['results']!") - self.phenotypes['results'][phenotype_name] = phenotype_table + self.phenotypes[run_name]['results'][phenotype_name] = phenotype_table def _calculateGrowthFactor(self, untreated, treated, db_rate_col): """ @@ -164,19 +164,19 @@ def calculateDrugScreenDESeq(self, untreated, treated, t0=None, run_name='pyDESe gamma_name, gamma = extractDESeqResults( dds, 'condition', t0, untreated, **kwargs ) - self._add_phenotype_results(f'gamma:{gamma_name}', gamma) + self._add_phenotype_results(run_name, f'gamma:{gamma_name}', gamma) for tr in treated: tau_name, tau = extractDESeqResults( dds, 'condition', t0, treated, **kwargs ) - self._add_phenotype_results(f'tau:{tau_name}', tau) + self._add_phenotype_results(run_name, f'tau:{tau_name}', tau) for tr in treated: rho_name, rho = extractDESeqResults( dds, 'condition', untreated, tr, **kwargs ) - self._add_phenotype_results(f'rho:{rho_name}', rho) + self._add_phenotype_results(run_name, f'rho:{rho_name}', rho) def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_col='pop_doublings', run_name=None, **kwargs): """ @@ -218,7 +218,7 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ transformation=self.fc_transformation, test=self.test, score_level=score_level, **kwargs ) - self._add_phenotype_results(f'gamma:{gamma_name}', gamma) + self._add_phenotype_results(run_name, f'gamma:{gamma_name}', gamma) for tr in treated: _, db_tr, db_diff = self._getTreatmentDoublingRate(untreated, tr, db_rate_col) @@ -230,7 +230,7 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ transformation=self.fc_transformation, test=self.test, score_level=score_level, **kwargs ) - self._add_phenotype_results(f'tau:{tau_name}', tau) + self._add_phenotype_results(run_name, f'tau:{tau_name}', tau) #TODO: warning / error if db_untreated and db_treated are too close, i.e. growth_rate ~= 0. rho_name, rho = runPhenoScore( @@ -239,7 +239,7 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ transformation=self.fc_transformation, test=self.test, score_level=score_level, **kwargs ) - self._add_phenotype_results(f'rho:{rho_name}', rho) + self._add_phenotype_results(run_name, f'rho:{rho_name}', rho) # #TODO: move this to a separate function / method # # get replicate level phenotype scores From 14dc166771196b7a2cb96c4289a5bd57fa2f83c4 Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 03:26:37 -0700 Subject: [PATCH 11/23] apply log10 transformation to counts to fix #82 and improve logics in stats analysis --- screenpro/assays/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index a15091b..a12570d 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -126,7 +126,12 @@ def countNormalization(self, pseudo_count_value=0.5): normalizeSeqDepth(self.adata) if self.verbose: print('Counts normalized by sequencing depth.') - + + # log scale the counts + self.adata.X = np.log10(self.adata.X) + if self.verbose: print('`log10` transformation applied to counts.') + + def calculateDrugScreenDESeq(self, untreated, treated, t0=None, run_name='pyDESeq2', **kwargs): """ Calculate DESeq2 results for a given drug screen dataset. From 697d2136cbcfd39cb2e91b1fec63b5199c18f42e Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 03:33:54 -0700 Subject: [PATCH 12/23] remove version limits for `biobear` related to https://github.com/wheretrue/biobear/pull/162 --- environment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index bd9689a..5cefad6 100644 --- a/environment.yml +++ b/environment.yml @@ -21,8 +21,8 @@ dependencies: - pip - pip: - polars - - pyarrow<17.0 - - biobear>=0.20.3,<0.21 + - pyarrow + - biobear - numba - pydeseq2 - simple_colors From 242c4a7c80d560a6a3622cde8ffcb3d79ff82bdd Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 03:34:21 -0700 Subject: [PATCH 13/23] mend --- screenpro/assays/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index a12570d..e959a09 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -131,7 +131,6 @@ def countNormalization(self, pseudo_count_value=0.5): self.adata.X = np.log10(self.adata.X) if self.verbose: print('`log10` transformation applied to counts.') - def calculateDrugScreenDESeq(self, untreated, treated, t0=None, run_name='pyDESeq2', **kwargs): """ Calculate DESeq2 results for a given drug screen dataset. From 6da7b6f80a4c950501676d2cbc2186f166b27f8f Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 03:36:28 -0700 Subject: [PATCH 14/23] bump version 0.4.7 --- screenpro/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/__init__.py b/screenpro/__init__.py index 6d155ab..5715699 100644 --- a/screenpro/__init__.py +++ b/screenpro/__init__.py @@ -31,6 +31,6 @@ from .dashboard import DrugScreenDashboard -__version__ = "0.4.6" +__version__ = "0.4.7" __author__ = "Abe Arab" __email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com" From 3174db691977c2fb89bc7c3975b469bf72774b41 Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 03:41:53 -0700 Subject: [PATCH 15/23] mend --- screenpro/assays/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index e959a09..c7519ef 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -113,7 +113,13 @@ def filterLowCounts(self, filter_type='all', minimum_reads=50): def countNormalization(self, pseudo_count_value=0.5): """ - Normalize the counts data in adata.X + Preprocess and normalize the counts data in adata.X + + Steps: + 1. Add pseudocount to counts + 2. Normalize counts by sequencing depth + 3. Log10 transformation + """ self.adata.layers['raw_counts'] = self.adata.X.copy() @@ -129,6 +135,7 @@ def countNormalization(self, pseudo_count_value=0.5): # log scale the counts self.adata.X = np.log10(self.adata.X) + if self.verbose: print('`log10` transformation applied to counts.') def calculateDrugScreenDESeq(self, untreated, treated, t0=None, run_name='pyDESeq2', **kwargs): From 19a03e143fd41c9b3f08014d422e053dc46fada0 Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 03:44:17 -0700 Subject: [PATCH 16/23] fix module names --- docs/source/visualize.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/source/visualize.md b/docs/source/visualize.md index c2febe3..8f8733b 100644 --- a/docs/source/visualize.md +++ b/docs/source/visualize.md @@ -1,12 +1,17 @@ -# Visualize module +# Visualization modules Set of python classes and functions to enable visualizing CRISPR screening datasets. ___ ```{eval-rst} -.. automodule:: screenpro.visualize +.. automodule:: screenpro.plotting :members: :undoc-members: :show-inheritance: -``` \ No newline at end of file + +.. automodule:: screenpro.dashboard + :members: + :undoc-members: + :show-inheritance: +``` From 0a90e21ffcd18807fa5735d743572a343263b4c8 Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 03:46:26 -0700 Subject: [PATCH 17/23] add info --- docs/source/history.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/history.rst b/docs/source/history.rst index 030d827..cd9bebf 100644 --- a/docs/source/history.rst +++ b/docs/source/history.rst @@ -10,6 +10,7 @@ History ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * add command line interface, i.e. ``screenpro --help`` * rename ``Counter`` class to ``GuideCounter`` for code clarity +* key bug fixes and improvements in phenotype calculation, normalization, and statistics * major bug fixes and improvements in code formatting 0.2.11 - 0.3.5 (Apr 2024 - June 2024) From d0bf22eec423ee519d7b2615c779b962bc088551 Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 03:47:12 -0700 Subject: [PATCH 18/23] mend --- docs/source/ngs.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/ngs.md b/docs/source/ngs.md index df58ec4..9e9042e 100644 --- a/docs/source/ngs.md +++ b/docs/source/ngs.md @@ -17,14 +17,14 @@ used for the screen, the functions are divided into two classes: ### Cas9 CRISPR-Cas system (single or dual sgRNA libraries) ```{eval-rst} -.. automodule:: screenpro.cas9 +.. automodule:: screenpro.ngs.cas9 :members: :show-inheritance: ``` ### Cas12 CRISPR-Cas system (multiplexed crRNA libraries) ```{eval-rst} -.. automodule:: screenpro.cas12 +.. automodule:: screenpro.ngs.cas12 :members: :show-inheritance: ``` From 4fee1e7941b81634e78f779997289d1c33c2db23 Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 03:57:01 -0700 Subject: [PATCH 19/23] mend --- docs/source/visualize.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/source/visualize.md b/docs/source/visualize.md index 8f8733b..c5314da 100644 --- a/docs/source/visualize.md +++ b/docs/source/visualize.md @@ -5,7 +5,12 @@ Set of python classes and functions to enable visualizing CRISPR screening datas ___ ```{eval-rst} -.. automodule:: screenpro.plotting +.. automodule:: screenpro.plotting.qc_plots + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: screenpro.plotting.pheno_plots :members: :undoc-members: :show-inheritance: From 5f91d5bcb0ebdde4fae2acb235186a12c7035bb6 Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 04:08:02 -0700 Subject: [PATCH 20/23] add `listPhenotypeScores` --- screenpro/assays/__init__.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index c7519ef..3a374ff 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -302,6 +302,25 @@ def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None # save phenotype name for reference self._add_phenotype_results(f'delta:{delta_name}') + def listPhenotypeScores(self, run_name='auto'): + """ + List available phenotype scores for a given run_name + + Args: + run_name (str): name of the phenotype calculation run to retrieve + """ + if run_name == 'auto': + if len(list(self.phenotypes.keys())) == 1: + run_name = list(self.phenotypes.keys())[0] + else: + raise ValueError( + 'Multiple phenotype calculation runs found.' + 'Please specify run_name. Available runs: ' + '' + ', '.join(self.phenotypes.keys()) + ) + + return list(self.phenotypes[run_name]['results'].keys) + def getPhenotypeScores(self, score_name, threshold, run_name='auto', ctrl_label='negative_control', target_col='target',pvalue_col='ttest pvalue', score_col='score'): """ Get phenotype scores for a given score level From 4472263736fa6210f0723a321bf1699adcb6128d Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 04:12:04 -0700 Subject: [PATCH 21/23] mend --- screenpro/assays/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index 3a374ff..9ab4f31 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -319,7 +319,9 @@ def listPhenotypeScores(self, run_name='auto'): '' + ', '.join(self.phenotypes.keys()) ) - return list(self.phenotypes[run_name]['results'].keys) + out = list(self.phenotypes[run_name]['results'].keys()) + + return out def getPhenotypeScores(self, score_name, threshold, run_name='auto', ctrl_label='negative_control', target_col='target',pvalue_col='ttest pvalue', score_col='score'): """ From 211f8f0d02a6d0706097dfa31a943291ede8f8a7 Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 05:08:56 -0700 Subject: [PATCH 22/23] update `runPhenoScoreForReplicate` --- screenpro/assays/__init__.py | 53 +++++++++++++++++--------------- screenpro/phenoscore/__init__.py | 19 +++++------- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/screenpro/assays/__init__.py b/screenpro/assays/__init__.py index 9ab4f31..d925203 100644 --- a/screenpro/assays/__init__.py +++ b/screenpro/assays/__init__.py @@ -220,9 +220,9 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ if type(treated) != list: treated = [treated] + # calculate phenotype scores: gamma, tau, rho if t0 != None and type(t0) == str: db_untreated,_,_ = self._getTreatmentDoublingRate(untreated, treated[0], db_rate_col) - # calculate phenotype scores: gamma, tau, rho gamma_name, gamma = runPhenoScore( self.adata, cond_ref=t0, cond_test=untreated, growth_rate=db_untreated, n_reps=self.n_reps, @@ -252,28 +252,33 @@ def calculateDrugScreen(self, score_level, untreated, treated, t0=None, db_rate_ ) self._add_phenotype_results(run_name, f'rho:{rho_name}', rho) - # #TODO: move this to a separate function / method - # # get replicate level phenotype scores - # pdata_df = pd.concat([ - # runPhenoScoreForReplicate( - # self.adata, x_label = x_label, y_label = y_label, score = score_label, - # transformation=self.fc_transformation, - # growth_factor_table=growth_factor_table, - # # **kwargs - # ).add_prefix(f'{score_label}_') - - # for x_label, y_label, score_label in [ - # ('T0', untreated, 'gamma'), - # ('T0', treated, 'tau'), - # (untreated, treated, 'rho') - # ] - # ],axis=1).T - # # add .pdata - # self.pdata = ad.AnnData( - # X = pdata_df, - # obs = growth_factor_table.loc[pdata_df.index,:], - # var=self.adata.var - # ) + # gnerate replicate level phenotype scores + pdata_dict = {} + for score_name in self.phenotypes[score_level]['results'].keys(): + score_label, comparison = score_name.split(':') + y_label, x_label = comparison.split('_vs_') + + #TODO: get growth rates for replicate level scores + + + pdata_dict.update({ + score_name: runPhenoScoreForReplicate( + self.adata, x_label = x_label, y_label = y_label, + transformation=self.fc_transformation, + # growth_factor_reps= + # **kwargs + ).add_prefix(f'{score_label}_').T # transpose to match pdata format + }) + + pdata_df = pd.concat(pdata_dict, axis=0) + + #TODO: fix `_calculateGrowthFactor` and `_getTreatmentDoublingRate` to maintain same format + # add .pdata + self.pdata = ad.AnnData( + X = pdata_df, + # obs = growth_factor_table.loc[pdata_df.index,:], + var=self.adata.var + ) def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None, **kwargs): """ @@ -395,7 +400,7 @@ def getAnnotatedTable(self, threshold, run_name='auto', ctrl_label='negative_con # f'gamma:{gamma_name}': gamma, f'tau:{tau_name}': tau, f'rho:{rho_name}': rho # }, axis=1) - score_names = {s for s, col in self.phenotypes[run_name].columns} + score_names = set(self.phenotypes[run_name]['results'].keys()) sort_var = self.adata.var.sort_values(['targetType','target']).index.to_list() df_list = {} diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 7eab337..2f2455c 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -226,15 +226,14 @@ def runPhenoScore(adata, cond_ref, cond_test, score_level, test, transformation= return result_name, result -def runPhenoScoreForReplicate(adata, x_label, y_label, score, growth_factor_table=None, transformation='log2', ctrl_label='negative_control'): +def runPhenoScoreForReplicate(adata, x_label, y_label, growth_factor_reps=None, transformation='log2', ctrl_label='negative_control'): """Calculate phenotype score for each pair of replicates. Args: adata (AnnData): AnnData object x_label: name of the first condition in column `condition` of `screen.adata.obs` y_label: name of the second condition in column `condition` of `screen.adata.obs` - score: score to use for calculating phenotype score, i.e. 'gamma', 'tau', or 'rho' - growth_factor_table: dataframe of growth factors, i.e. output from `getGrowthFactors` function + growth_factor_reps (dict): dictionary of growth factors for each replicate transformation (str): transformation to use for calculating score ctrl_label: string to identify labels of negative control elements in sgRNA library (default is 'negative_control') @@ -247,21 +246,19 @@ def runPhenoScoreForReplicate(adata, x_label, y_label, score, growth_factor_tabl results = {} - for replicate in adat.obs.replicate.unique(): + if growth_factor_reps is not None: + growth_rate_reps = growth_factor_reps[replicate] + else: + growth_factor_reps = dict([(replicate, 1) for replicate in adat.obs.replicate.unique()]) - if growth_factor_table: - growth_rate = growth_factor_table.query( - f'score=="{score}" & replicate=={replicate}' - )['growth_factor'].values[0] - else: - growth_rate = 1 + for replicate in adat.obs.replicate.unique(): res = calculatePhenotypeScore( x=adat[adat.obs.query(f'condition == "{x_label}" & replicate == {str(replicate)}').index].X, y=adat[adat.obs.query(f'condition == "{y_label}" & replicate == {str(replicate)}').index].X, x_ctrl=adat_ctrl[adat_ctrl.obs.query(f'condition == "{x_label}" & replicate == {str(replicate)}').index].X, y_ctrl=adat_ctrl[adat_ctrl.obs.query(f'condition == "{y_label}" & replicate == {str(replicate)}').index].X, - growth_rate=growth_rate, + growth_rate=growth_rate_reps[replicate], transformation=transformation, level='row' # there is only one column so `row` option here is equivalent to the value before averaging. ) From 52480eac124fcca0a71a070bcd5bc03f4fefa70f Mon Sep 17 00:00:00 2001 From: abearab Date: Sun, 28 Jul 2024 05:12:11 -0700 Subject: [PATCH 23/23] mend --- screenpro/phenoscore/__init__.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 2f2455c..88f60f7 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -226,14 +226,14 @@ def runPhenoScore(adata, cond_ref, cond_test, score_level, test, transformation= return result_name, result -def runPhenoScoreForReplicate(adata, x_label, y_label, growth_factor_reps=None, transformation='log2', ctrl_label='negative_control'): +def runPhenoScoreForReplicate(adata, x_label, y_label, growth_rate_reps=None, transformation='log2', ctrl_label='negative_control'): """Calculate phenotype score for each pair of replicates. Args: adata (AnnData): AnnData object x_label: name of the first condition in column `condition` of `screen.adata.obs` y_label: name of the second condition in column `condition` of `screen.adata.obs` - growth_factor_reps (dict): dictionary of growth factors for each replicate + growth_rate_reps (dict): dictionary of growth rates for each replicate transformation (str): transformation to use for calculating score ctrl_label: string to identify labels of negative control elements in sgRNA library (default is 'negative_control') @@ -246,10 +246,8 @@ def runPhenoScoreForReplicate(adata, x_label, y_label, growth_factor_reps=None, results = {} - if growth_factor_reps is not None: - growth_rate_reps = growth_factor_reps[replicate] - else: - growth_factor_reps = dict([(replicate, 1) for replicate in adat.obs.replicate.unique()]) + if growth_rate_reps is None: + growth_rate_reps = dict([(replicate, 1) for replicate in adat.obs.replicate.unique()]) for replicate in adat.obs.replicate.unique():