Skip to content

Commit

Permalink
Bump v0.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
FloBay committed Oct 2, 2023
1 parent 133ef22 commit 0e2c3b2
Show file tree
Hide file tree
Showing 15 changed files with 1,178 additions and 1,116 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
v0.2.0
- Support multiple controls to calculate more accurate ratios
- Improved stability of dashboards

v0.1.1
- Switch to pip installation
- Minor improvements and fixes
Expand Down
30 changes: 17 additions & 13 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion curve_curator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
# Florian P. Bayer - 2023
#

__version__ = '0.1.1'
__version__ = '0.2.0'
19 changes: 11 additions & 8 deletions curve_curator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
from . import quality_control
from .__init__ import __version__


def main():
if __name__ == '__main__':
# Build a command line parser for parsing multiple config files
command_line = argparse.ArgumentParser(
description='CurveCurator',
Expand Down Expand Up @@ -52,7 +51,7 @@ def main():
dest="path",
metavar="<PATH>",
type=str,
help="Relative path to the config.toml or batch.txt file to run the pipeline.")
help="Relative path to the config.toml file to run the pipeline.")

# Parse the terminal arguments
args = command_line.parse_args()
Expand All @@ -77,9 +76,17 @@ def main():
ui.setup_logger(Path(tf).parent, name=i)
ui.message(f' * Executing CurveCurator pipeline version {__version__}.')

# Load config
# Make a counter in batch mode only for the terminal
if args.batch:
ui.message(f' * Processing {i+1} of {len(toml_files)} data sets.', terminal_only=True)

# Check the input file is a toml file
if not ui.is_toml_file(tf):
ui.error(f' * The given file is not a TOML parameter file !\n * If it\'s a batch file make sure you activate the batch mode with --batch.')
ui.doneline()
continue

# Load config
config = ui.load_toml(tf, random_mode=bool(args.random))
config = ui.set_default_values(config)

Expand Down Expand Up @@ -112,7 +119,3 @@ def main():

# Done
ui.doneline()


if __name__ == '__main__':
main()
15 changes: 8 additions & 7 deletions curve_curator/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,11 +678,11 @@ def dashboard(df, title, out_path, drug_doses, drug_unit, cols_ratio, model, f_s
fig1.add_tools(dots_hover_tool)

# Add thresholds and potency line. Visibility depends on the used approach
volcano_threshold_line_v0 = fig1.line(x='x', y='y', line_width=2, source=threshold_v0, color='red', line_dash='dashed')
volcano_threshold_line_v0 = fig1.line(x='x', y='y', line_width=1.5, source=threshold_v0, color='crimson', line_dash='solid')
volcano_threshold_line_v0.visible = True
volcano_threshold_line_v1 = fig1.line(x='x', y='y', line_width=2, source=threshold_v1, color='red', line_dash='dashed')
volcano_threshold_line_v1 = fig1.line(x='x', y='y', line_width=1.5, source=threshold_v1, color='crimson', line_dash='solid')
volcano_threshold_line_v1.visible = False
potency_threshold_line_p = fig1.line(x='x', y='y', line_width=2, source=threshold_p, color='red', line_dash='dashed')
potency_threshold_line_p = fig1.line(x='x', y='y', line_width=1.5, source=threshold_p, color='crimson', line_dash='solid')
potency_threshold_line_p.visible = volcano_params['method'] != 'sam'

# Add color bar
Expand Down Expand Up @@ -716,7 +716,7 @@ def dashboard(df, title, out_path, drug_doses, drug_unit, cols_ratio, model, f_s
names=len(drug_doses) * ['example']))

# Plot the Curve plot with fit line and scatter points
fit_line = fig2.multi_line(xs='xs', ys='ys', color="red", line_width=5, alpha=0.6, source=curve_fit_source)
fit_line = fig2.multi_line(xs='xs', ys='ys', color="crimson", line_width=5, alpha=0.6, source=curve_fit_source)
curve_dots = fig2.circle(x='x', y='y', fill_color='black', fill_alpha=1, source=curve_dots_source, size=7, line_color='black')

# Add hover tooltips labels to figure 2 for fitted lines and curve dots
Expand Down Expand Up @@ -752,7 +752,7 @@ def dashboard(df, title, out_path, drug_doses, drug_unit, cols_ratio, model, f_s

# Plot the data distribution
hist_boxes_3 = fig3.quad(top=edges[:-1], bottom=edges[1:], left=0, right=hist, fill_color="gray", line_color="white", alpha=1)
quality_lines = fig3.multi_line(xs='xs', ys='ys', color="red", line_width=3, alpha=1, source=quality_source)
quality_lines = fig3.multi_line(xs='xs', ys='ys', color="crimson", line_width=2.5, alpha=1, source=quality_source)
threshold1_line = fig3.line(x='x', y='y', color="black", line_width=3, alpha=1, source=signal_threshold1_source, line_dash='dashed')
threshold2_line = fig3.line(x='x', y='y', color="black", line_width=3, alpha=1, source=signal_threshold2_source, line_dash='dashed')

Expand Down Expand Up @@ -792,7 +792,7 @@ def dashboard(df, title, out_path, drug_doses, drug_unit, cols_ratio, model, f_s

# Plot the data distribution
hist_boxes_4 = fig4.quad(top=edges[:-1], bottom=edges[1:], left=0, right=hist, fill_color="gray", line_color="white", alpha=1)
identification_lines = fig4.multi_line(xs='xs', ys='ys', color="red", line_width=3, alpha=1, source=identification_source)
identification_lines = fig4.multi_line(xs='xs', ys='ys', color="crimson", line_width=2.5, alpha=1, source=identification_source)
threshold_line1 = fig4.line(x='x', y='y', color="black", line_width=3, alpha=1, source=score_threshold1_source, line_dash='dashed')
threshold_line2 = fig4.line(x='x', y='y', color="black", line_width=3, alpha=1, source=score_threshold2_source, line_dash='dashed')

Expand Down Expand Up @@ -826,6 +826,7 @@ def dashboard(df, title, out_path, drug_doses, drug_unit, cols_ratio, model, f_s
potency_bins = int(abs(potency_range[1] - potency_range[0]) // 0.1)
if len(potency_array) > 0:
hist, edges = np.histogram(potency_array, density=True, bins=np.linspace(potency_range[0], potency_range[1], potency_bins))
hist = hist / max(hist)
else:
# if not a single significant curve is present to guarantee that a nice plot is still drawn with empty background
hist, edges = [0, 1], [-2, -1, 0]
Expand All @@ -835,7 +836,7 @@ def dashboard(df, title, out_path, drug_doses, drug_unit, cols_ratio, model, f_s

# Plot the data distribution
hist_boxes_5 = fig5.quad(top=edges[:-1], bottom=edges[1:], left=0, right=hist, fill_color="gray", line_color="white", alpha=1)
potency_lines = fig5.multi_line(xs='xs', ys='ys', color="red", line_width=3, alpha=1, source=potency_source)
potency_lines = fig5.multi_line(xs='xs', ys='ys', color="crimson", line_width=2.5, alpha=1, source=potency_source)
threshold_line1 = fig5.line(x='x', y='y', color="black", line_width=3, alpha=1, source=potency_threshold1_source, line_dash='dashed')
threshold_line2 = fig5.line(x='x', y='y', color="black", line_width=3, alpha=1, source=potency_threshold2_source, line_dash='dashed')

Expand Down
35 changes: 21 additions & 14 deletions curve_curator/quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def get_imputation_value(df, col, pct=0.005):
----------
df : pd.DataFrame
A data frame containing at least col name.
col : str
column name in the df with values from which a good imputation value is drawn.
col : array-like of str
A array-like object of column name(s) in the df with values from which a good imputation value is drawn.
pct : float, optional
Percentile threshold which is used to find a good value for imputation. By default 0.005.
Expand All @@ -59,7 +59,7 @@ def get_imputation_value(df, col, pct=0.005):
value : float
imputation value.
"""
value = df[col].replace(0, np.nan).dropna().quantile(pct)
value = df[col].mean(axis=1).replace(0, np.nan).dropna().quantile(pct)
return value


Expand Down Expand Up @@ -132,10 +132,11 @@ def normalize_values(df, raw_cols, norm_cols, ref_col=None):
return df, normalization_factors


def add_ratios(df, cols, ratio_cols, ref_col):
def add_ratios(df, cols, ratio_cols, ref_cols):
"""
Calculate ratios of cols / ref_col.
The ratio values will be added to the df under the name of ratio_cols.
In case of multiple columns the mean of ref_cols is used to calculate ratios.
Parameters
----------
Expand All @@ -145,15 +146,15 @@ def add_ratios(df, cols, ratio_cols, ref_col):
A array-like object containing the column labels of the data.
ratio_cols : array-like
A array-like object containing the column labels of the future ratio data.
ref_col : string
A string indicating a column used as a reference for ratio calculations.
ref_cols : array-like
A array-like object of strings indicating one or multiple column(s) used as a reference for ratio calculations.
Returns
-------
df : pd.DataFrame
The result data frame with the added ratio_cols.
"""
df[ratio_cols] = df[cols].div(df[ref_col], axis=0).replace([np.inf], np.nan)
df[ratio_cols] = df[cols].div(df[ref_cols].mean(axis=1), axis=0).replace([np.inf], np.nan)
return df


Expand Down Expand Up @@ -213,7 +214,7 @@ def fit_model(y_data, x_data, M0, M1, fit_params, f_statistic_params):
M0 : MeanModel object
An MeanModel instance from curve_curator.models.
M1 : LogisticModel object
An LogisitcModel instance from curve_curator.models.
An LogisticModel instance from curve_curator.models.
fit_params : dict
parameter dictionary which adjust the specific fitting procedures. Must contain at least the fit speed and fit type.
f_statistic_params : dict
Expand Down Expand Up @@ -389,17 +390,19 @@ def run_pipeline(df, config, decoy_mode=False):
"""
# Load parameters from toml file
experiments = np.array(config['Experiment']['experiments'])
control_experiments = np.array(config['Experiment']['control_experiment'])
drug_concs = np.array(config['Experiment']['doses'])
drug_scale = config['Experiment']['dose_scale']
control_mask = (drug_concs != 0)
drug_scale = float(config['Experiment']['dose_scale'])
control_mask = (drug_concs != 0.0)
drug_log_concs = tool.build_drug_log_concentrations(drug_concs[control_mask], drug_scale)

# build the new column names based on experiment numbers
cols_raw = tool.build_col_names('Raw {}', experiments)
col_raw_control = tool.build_col_names('Raw {}', control_experiments) #f"Raw {config['Experiment']['control_experiment']}"
cols_normal = tool.build_col_names('Normalized {}', experiments)
col_normal_control = tool.build_col_names('Normalized {}', control_experiments) #f"Normalized {config['Experiment']['control_experiment']}"
cols_ratio = tool.build_col_names('Ratio {}', experiments)
col_raw_control = f"Raw {config['Experiment']['control_experiment']}"
col_normal_control = f"Normalized {config['Experiment']['control_experiment']}"
col_ratio_control = tool.build_col_names('Ratio {}', control_experiments)

# Setup the curve fit with default values unless specified in the toml file
proc_params = config['Processing']
Expand Down Expand Up @@ -434,8 +437,12 @@ def run_pipeline(df, config, decoy_mode=False):
else:
df = add_ratios(df, cols_raw, cols_ratio, col_raw_control)

# Signal Quality is the raw intensity in the control
df['Signal Quality'] = np.log2(df[col_raw_control])
# If multiple controls are provided, estimate the noise level in the controls alone
if len(col_raw_control) > 1:
df['Control Ratio Std'] = df[col_ratio_control].std(axis=1)

# Absolute signal quality is the raw intensity ot the control(s)
df['Signal Quality'] = np.log2(df[col_raw_control].mean(axis=1))

# Sort concentrations and observations from low to high dose
sorted_doses = np.argsort(drug_log_concs)
Expand Down
28 changes: 20 additions & 8 deletions curve_curator/user_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,21 @@ def error(msg, end='\n\n\n\n'):
"""
prints a error message to the terminal and logging file
"""
error_line = "\n" + 27 * '#' + ' ERROR ' + 27 * '#' + "\n\n"
msg = f'{TerminalFormatting.WARNING}{error_line}{msg}{TerminalFormatting.ENDC}'
error_line = "\n" + 32 * '#' + ' ERROR ' + 31 * '#' + "\n\n"
msg = f'{TerminalFormatting.FAIL}{error_line}{msg}{TerminalFormatting.ENDC}'
if LOGGER:
LOGGER.error(msg)
else:
print(msg, end=end)


def is_toml_file(file_path):
"""
Checks if the file_path leads to a toml file
"""
return os.path.splitext(file_path)[-1].lower().endswith('.toml')


def check_path(path, is_dir=False):
"""
check_path(path, is_dir=False)
Expand Down Expand Up @@ -197,9 +204,9 @@ def check_toml_params(config):
#
# ['Experiment']
#
experiments = config['Experiment']['experiments']
control_experiment = config['Experiment']['control_experiment']
doses = config['Experiment']['doses']
experiments = np.array(config['Experiment']['experiments'])
control_experiment = np.array([config['Experiment']['control_experiment']]).flatten()
doses = np.array(config['Experiment']['doses'])
dose_scale = config['Experiment']['dose_scale']
dose_unit = config['Experiment']['dose_unit']

Expand All @@ -209,8 +216,8 @@ def check_toml_params(config):
if len(experiments) != len(doses):
error("Error: [Experiment] 'experiments' and [Experiment] 'doses' do no correspond in length.")
raise ValueError("[Experiment] 'experiments' & 'doses' length")
if control_experiment not in experiments:
error("Error: [Experiment] 'control_experiment' is not in [Experiment] 'experiments'.")
if len(set(control_experiment) - set(experiments)) > 0:
error("Error: [Experiment] at least one 'control_experiment' is not in [Experiment] 'experiments'.")
raise ValueError("[Experiment] 'experiments'")
if not dose_scale:
error("Error: [Experiment] 'dose_scale' is empty.")
Expand Down Expand Up @@ -303,10 +310,15 @@ def set_default_values(config):
"""
Sets default values for optional parameters of the pipeline when the user didn't specify it.
"""
experiments = config['Experiment']['experiments']
experiments = np.array(config['Experiment']['experiments']).flatten()
control_experiments = np.array([config['Experiment']['control_experiment']]).flatten()
doses = np.array([config['Experiment']['doses']]).flatten()

# Experiment
exp_params = config['Experiment']
exp_params['experiments'] = experiments
exp_params['control_experiment'] = control_experiments
exp_params['doses'] = doses
exp_params['dose_scale'] = float(exp_params.get('dose_scale', 1e0))
config['Experiment'] = exp_params

Expand Down
23 changes: 12 additions & 11 deletions example_datasets/decryptM_Dasatinib/curveCurator.log
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
2023-08-04 14:33:03,588 - INFO - * Executing CurveCurator pipeline version 0.0.6.
2023-08-04 14:33:03,588 - INFO - * Reading parameter file of experiment.
2023-08-04 14:33:03,603 - INFO - * Loading data file ./evidence.txt.
2023-08-04 14:33:06,277 - INFO - * The following normalization factors were applied:
2023-08-04 14:33:06,284 - INFO - {'Raw 1': 0.58, 'Raw 2': 0.13, 'Raw 3': 0.2, 'Raw 4': -0.61, 'Raw 5': -0.31, 'Raw 6': 0.19, 'Raw 7': 0.24, 'Raw 8': -0.19, 'Raw 9': -0.08, 'Raw 10': -0.15}
2023-08-04 14:33:06,294 - INFO - * Fitting curves parameters by standard OLS with 5 cores:
2023-08-04 14:35:58,962 - INFO - * Fitting curves parameters done !
2023-08-04 14:35:59,052 - INFO - * Calculate Relevance Score and apply SAM user thresholds:
2023-08-04 14:35:59,062 - INFO - alpha=0.05, fc_lim=0.45, s0=0.2141
2023-08-04 14:36:01,232 - INFO - * Rendering interactive dashboard using webgl backend ...
2023-08-04 14:36:02,832 - INFO - * Dashboard successfully rendered.
2023-10-01 16:56:46,687 - INFO - * Executing CurveCurator pipeline version 0.2.0.
2023-10-01 16:56:46,687 - INFO - * Reading parameter file of experiment.
2023-10-01 16:56:46,734 - INFO - * Loading data file Z:\internal_projects\active\TOPAS\Publications\DecryptM_Finder\GitHubUpload\curve_curator\example_datasets\decryptM_Dasatinib\./evidence.txt.
2023-10-01 16:56:47,864 - INFO - * 67 Curves were removed because of >4 missing values.
2023-10-01 16:56:49,306 - INFO - * The following normalization factors were applied:
2023-10-01 16:56:49,306 - INFO - {'Raw 1': 0.58, 'Raw 2': 0.13, 'Raw 3': 0.2, 'Raw 4': -0.61, 'Raw 5': -0.31, 'Raw 6': 0.19, 'Raw 7': 0.24, 'Raw 8': -0.19, 'Raw 9': -0.08, 'Raw 10': -0.15}
2023-10-01 16:56:49,339 - INFO - * Fitting curves parameters by standard OLS with 5 cores:
2023-10-01 16:58:46,797 - INFO - * Fitting curves parameters done !
2023-10-01 16:58:46,912 - INFO - * Calculate Relevance Score and apply SAM user thresholds:
2023-10-01 16:58:46,912 - INFO - alpha=0.05, fc_lim=0.45, s0=0.2141
2023-10-01 16:58:48,859 - INFO - * Rendering interactive dashboard using webgl backend ...
2023-10-01 16:58:50,452 - INFO - * Dashboard successfully rendered.
Loading

0 comments on commit 0e2c3b2

Please sign in to comment.