Skip to content

Commit

Permalink
Merge pull request #95 from ArcInstitute/dev
Browse files Browse the repository at this point in the history
minor fixes
  • Loading branch information
abearab authored Sep 22, 2024
2 parents 5a54c5d + 7857245 commit 7777e1b
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 32 deletions.
4 changes: 3 additions & 1 deletion docs/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@ dependencies:
- sphinx=5.3.0
- sphinx_rtd_theme=1.1.1
- sphinxcontrib-bibtex
- polars>0.20
- pip
- pip:
- polars
- pyarrow
- biobear
- numba
- pydeseq2
- simple_colors
- adjustText
- watermark
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ dependencies:
- ipykernel
- mscorefonts
- rust>=1.72
- polars>0.20
- pip
- pip:
- polars
- pyarrow
- biobear
- numba
- pydeseq2
- simple_colors
- adjustText
- watermark
2 changes: 1 addition & 1 deletion screenpro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@
from .dashboard import DrugScreenDashboard


__version__ = "0.4.13"
__version__ = "0.4.14"
__author__ = "Abe Arab"
__email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com"
50 changes: 26 additions & 24 deletions screenpro/phenoscore/_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@
}


def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='negative_control'):
def getCombinedScore(df_in, score_col='score', pvalue_col='pvalue', target_col='target', ctrl_label='negative_control'):
"""
Calculate the combined score column based on the given phenotypic scores and p-values.
Combined score is calculated as:
$combined\_score = \frac{score}{pseudo\_sd} \times -\log_{10}(pvalue)$
Parameters:
df (pandas.DataFrame): The input DataFrame.
df_in (pandas.DataFrame): The input DataFrame.
score_col (str): The column name for the individual scores. Default is 'score'.
pvalue_col (str): The column name for the p-values. Default is 'pvalue'.
target_col (str): The column name for the target variable. Default is 'target'.
Expand All @@ -39,18 +39,22 @@ def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='neg
Returns:
pandas.Series: The calculated combined score column.
"""
if 'target' not in df.columns:
raise ValueError('Column "target" not found in the input DataFrame.')
# make a copy of input dataframe
df = df_in.copy()

for col in [score_col, pvalue_col, target_col]:
if col not in df.columns:
raise ValueError(f'Column "{col}" not found in the input DataFrame.')

# calculate pseudo_sd
pseudo_sd = df[df['target'].eq(ctrl_label)][score_col].tolist()
pseudo_sd = df[df[target_col].eq(ctrl_label)][score_col].tolist()
pseudo_sd = np.std(pseudo_sd)

# calculate combined score
return df[score_col]/pseudo_sd * -np.log10(df[pvalue_col])


def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalue_col=None, ctrl_label='negative_control'):
def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col='score', pvalue_col='pvalue', target_col='target', ctrl_label='negative_control'):
"""
Annotate the given score tabel
Expand All @@ -60,49 +64,47 @@ def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalu
up_hit (str): up hit label
down_hit (str): down hit label
threshold (int): threshold value
score_col (str): score column name
pvalue_col (str): pvalue column name
ctrl_label (str): control label value
score_col (str): score column name. Default is 'score'.
target_col (str): column name for the target variable. Default is 'target'.
pvalue_col (str): pvalue column name. Default is 'pvalue'.
ctrl_label (str): control label value. Default is 'negative_control'.
Returns:
pd.DataFrame: annotated score dataframe
"""
if score_col is None: score_col = 'score'
if pvalue_col is None: pvalue_col = 'pvalue'
# make a copy of input dataframe
df = df_in.copy()

sel = ['target',score_col, pvalue_col]

for col in sel:
if col not in df_in.columns:
for col in [score_col, pvalue_col, target_col]:
if col not in df.columns:
raise ValueError(f'Column "{col}" not found in the input DataFrame.')

# make a copy of input dataframe
df = df_in[sel].copy()
# # rename/reformat columns
# df.columns = ['target', 'score', 'pvalue']

df[score_col] = df[score_col].astype(float)
df[pvalue_col] = df[pvalue_col].astype(float)

# add combined score column
df['combined_score'] = getCombinedScore(df, score_col, pvalue_col, ctrl_label)
df['combined_score'] = getCombinedScore(
df,
score_col=score_col, pvalue_col=pvalue_col, target_col=target_col,
ctrl_label=ctrl_label)

# add label column
df['label'] = '.'

# annotate hits: up
df.loc[
(df[score_col] > 0) & (~df['target'].eq(ctrl_label)) &
(df[score_col] > 0) & (~df[target_col].eq(ctrl_label)) &
(df['combined_score'] >= threshold), 'label'
] = up_hit

# annotate hits: down
df.loc[
(df[score_col] < 0) & (~df['target'].eq(ctrl_label)) &
(df[score_col] < 0) & (~df[target_col].eq(ctrl_label)) &
(df['combined_score'] <= -threshold), 'label'
] = down_hit

# annotate control
df.loc[df['target'].eq(ctrl_label), 'label'] = ctrl_label
df.loc[df[target_col].eq(ctrl_label), 'label'] = ctrl_label

# annotate non-hit
df.loc[df['label'] == '.', 'label'] = 'target_non_hit'
Expand Down
8 changes: 4 additions & 4 deletions screenpro/phenoscore/delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,10 @@ def compareByTargetGroup(adata, df_cond_ref, df_cond_test, keep_top_n, var_names

# combine results into a dataframe
result = pd.concat([
pd.Series(scores, name='score'),
pd.Series(p_values, name=f'{test} pvalue'),
pd.Series(adj_p_values, name='BH adj_pvalue'),
pd.Series(target_sizes, name='number_of_guide_elements'),
pd.Series(scores, name='score', dtype=float),
pd.Series(p_values, name=f'{test} pvalue', dtype=float),
pd.Series(adj_p_values, name='BH adj_pvalue', dtype=float),
pd.Series(target_sizes, name='number_of_guide_elements', dtype=int),
], axis=1)

# add targets information
Expand Down
19 changes: 18 additions & 1 deletion screenpro/plotting/_rank.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pandas as pd
import matplotlib.pyplot as plt

from adjustText import adjust_text
from ._utils import yellow_blue


Expand Down Expand Up @@ -57,8 +59,23 @@ def rank_plot(df, rank_col, color_col=None, name_col='target', highlight_values_
ax.plot(highlight_ranks['Rank'], highlight_ranks[rank_col], 'o', color=highlight_color, markersize=dot_size * highlight_size_factor)

if highlight_values['text'] is not False:
texts = []
for i, row in highlight_ranks.iterrows():
ax.text(row['Rank'] + .01, row[rank_col] + .001, row[name_col], fontsize=txt_font_size, color=highlight_color, ha='right')
t = ax.text(
row['Rank'] + .01,
row[rank_col] + .001,
row[name_col],
fontsize=txt_font_size,
color=highlight_color,
ha='right'
)
texts.append(t)

adjust_text(
texts,
arrowprops=dict(arrowstyle='-', color=highlight_color, lw=0.5),
ax=ax
)

# Add labels and title
ax.set_xlabel(xlabel)
Expand Down

0 comments on commit 7777e1b

Please sign in to comment.