Skip to content

Commit

Permalink
Merge pull request #3 from AmandaBirmingham/per_gram_fix
Browse files Browse the repository at this point in the history
Per gram fix
  • Loading branch information
AmandaBirmingham authored Jan 15, 2024
2 parents 0f19e40 + de841d7 commit c7f64b9
Show file tree
Hide file tree
Showing 7 changed files with 1,001 additions and 1,280 deletions.
Binary file added absolute_quant_example.xlsx
Binary file not shown.
189 changes: 135 additions & 54 deletions pysyndna/src/calc_cell_counts.py

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions pysyndna/src/fit_syndna_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ def _validate_required_columns_exist(

missing_cols = set(required_cols_list) - set(input_df.columns)
if len(missing_cols) > 0:
missing_cols = sorted(missing_cols)
raise ValueError(
f"{error_msg}: {missing_cols}")

Expand Down Expand Up @@ -208,8 +209,8 @@ def fit_linear_regression_models(

log_messages_list = []

# id any samples that have an inadequate total number of reads aligned
# to syndna (i.e. less than min_sample_counts). Don't drop yet.
# id any syndnas that have an inadequate total number of reads aligned
# to them across all samples (less than min_sample_counts). Don't drop yet.
# Gathering this now bc it is easier while syndna id is still in the index,
# but we want the full column set while doing the validation checks.
# Note: synDNA author also made passing mention of dropping samples with
Expand Down Expand Up @@ -403,7 +404,7 @@ def _calc_indiv_syndna_weights(
# by summing up the concentrations of each individual syndna
total_syndna_ng_per_ul = syndna_concs_df[SYNDNA_INDIV_NG_UL_KEY].sum()

# add a column for the unitless fraction of the syndna pool made up of
# add a column for the fraction of the syndna pool made up of
# each individual syndna by dividing the syndna_ng_per_uL of each
# syndna by the total_syndna_ng_per_ul for the pool
syndna_concs_df[SYNDNA_FRACTION_OF_POOL_KEY] = (
Expand Down
4 changes: 2 additions & 2 deletions pysyndna/tests/data/modelling_output.tsv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# This file is based on
# https://github.com/lzaramela/SynDNA/blob/main/data/saliva_linear_models.tsv
# with the values of the a_intercept and b_intercept columns negated
# (because the lzaramela code generates regression models that predict the
# (because the Zaramela code generates regression models that predict the
# *negative* log10 of the read weight while the code under test predicts just
# log10 of the read weight.
# log10 of the read weight.)
# All other columns were deleted.
ID a_intercept b_slope
A1_pool1_Fwd -6.775395054 1.244876524
Expand Down
12 changes: 10 additions & 2 deletions pysyndna/tests/data/models.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
"A":
"example1":
"slope": 1.24487652379132
"intercept": -6.77539505390338
"rvalue": 0.9865030975156575
"pvalue": 1.428443560659758e-07
"stderr": 0.07305408550335003
"intercept_stderr": 0.2361976278251443
"B":
"example2":
"slope": 1.24675913604407
"intercept": -7.155318973708384
"rvalue": 0.9863241797356326
"pvalue": 1.505381146809759e-07
"stderr": 0.07365795255302438
"intercept_stderr": 0.2563956755844754
# example4 is a copy of example2
"example4":
"slope": 1.24675913604407
"intercept": -7.155318973708384
"rvalue": 0.9863241797356326
Expand Down
1,574 changes: 662 additions & 912 deletions pysyndna/tests/test_calc_cell_counts.py

Large diffs are not rendered by default.

Loading

0 comments on commit c7f64b9

Please sign in to comment.