Skip to content

Commit

Permalink
Added input casting to fit_syndna_models
Browse files Browse the repository at this point in the history
  • Loading branch information
AmandaBirmingham committed Feb 10, 2024
1 parent 952d561 commit 8513c6c
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 1 deletion.
14 changes: 13 additions & 1 deletion pysyndna/src/fit_syndna_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from typing import Optional, List, Dict, Union
from pysyndna.src.util import validate_required_columns_exist, \
validate_metadata_vs_reads_id_consistency, SAMPLE_ID_KEY
validate_metadata_vs_reads_id_consistency, cast_cols, SAMPLE_ID_KEY

DEFAULT_MIN_SAMPLE_COUNTS = 1

Expand Down Expand Up @@ -401,6 +401,18 @@ def fit_linear_regression_models(
validate_required_columns_exist(
sample_syndna_weights_and_total_reads_df, expected_info_cols,
"sample metadata is missing required column(s)")
expected_syndna_cols = [SYNDNA_ID_KEY, SYNDNA_INDIV_NG_UL_KEY]
validate_required_columns_exist(
syndna_concs_df, expected_syndna_cols,
"syndna concentrations are missing required column(s)")

# cast numeric input columns to the correct type
sample_syndna_weights_and_total_reads_df = cast_cols(
sample_syndna_weights_and_total_reads_df, [SYNDNA_POOL_MASS_NG_KEY])
sample_syndna_weights_and_total_reads_df = cast_cols(
sample_syndna_weights_and_total_reads_df,
[SAMPLE_TOTAL_READS_KEY], int)
syndna_concs_df = cast_cols(syndna_concs_df, [SYNDNA_INDIV_NG_UL_KEY])

# id any syndnas that have an inadequate total number of reads aligned
# to them across all samples (less than min_sample_counts). Don't drop yet.
Expand Down
67 changes: 67 additions & 0 deletions pysyndna/tests/test_fit_syndna_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,45 @@ def test_fit_linear_regression_models_for_qiita(self):

self.assertDictEqual(expected_out, output_dict)

def test_fit_linear_regression_models_for_qiita_w_casts(self):
# same as test_fit_linear_regression_models_for_qiita, but with
# all param values passed in as strings
prep_info_dict = {k: [str(x) for x in self.prep_info_dict[k]]
for k in self.prep_info_dict}
prep_info_df = pd.DataFrame(prep_info_dict)
input_biom = biom.table.Table(
self.reads_per_syndna_per_sample_array,
self.reads_per_syndna_per_sample_dict[SYNDNA_ID_KEY],
self.sample_ids)
min_counts = 50

# These are text versions of the linear regression results
# for the full data (see self.lingress_results and the
# "linear regressions" sheet of "absolute_quant_example.xlsx").
expected_out = {
'lin_regress_by_sample_id':
'A:\n'
' intercept: -6.724238188489\n'
' intercept_stderr: 0.236197627825\n'
' pvalue: 1.42844e-07\n'
' rvalue: 0.986503097515\n'
' slope: 1.244876523791\n'
' stderr: 0.073054085503\n'
'B:\n'
' intercept: -7.155318973708\n'
' intercept_stderr: 0.256395675584\n'
' pvalue: 1.50538e-07\n'
' rvalue: 0.986324179735\n'
' slope: 1.246759136044\n'
' stderr: 0.073657952553\n',
'fit_syndna_models_log': ''
}

output_dict = fit_linear_regression_models_for_qiita(
prep_info_df, input_biom, min_counts)

self.assertDictEqual(expected_out, output_dict)

def test_fit_linear_regression_models_for_qiita_w_alt_config(self):
prep_info_df = pd.DataFrame(self.prep_info_dict)
input_biom = biom.table.Table(
Expand Down Expand Up @@ -316,6 +355,34 @@ def test_fit_linear_regression_models(self):
self.lingress_results, out_linregress_dict)
self.assertEqual([], out_msgs)

def test_fit_linear_regression_models_w_casts(self):
min_count = 50

# same as test_fit_linear_regression_models, but with
# all param values passed in as strings
syndna_concs_dict = {k: [str(x) for x in self.syndna_concs_dict[k]]
for k in self.syndna_concs_dict}
syndna_concs_df = pd.DataFrame(syndna_concs_dict)
a_b_sample_syndna_weights_and_total_reads_dict = {
k: [str(x) for x in
self.a_b_sample_syndna_weights_and_total_reads_dict[k]]
for k in self.a_b_sample_syndna_weights_and_total_reads_dict}
sample_syndna_weights_and_total_reads_df = pd.DataFrame(
a_b_sample_syndna_weights_and_total_reads_dict)

reads_per_syndna_per_sample_df = pd.DataFrame(
self.reads_per_syndna_per_sample_dict)
reads_per_syndna_per_sample_df.set_index(SYNDNA_ID_KEY, inplace=True)

out_linregress_dict, out_msgs = fit_linear_regression_models(
syndna_concs_df,
sample_syndna_weights_and_total_reads_df,
reads_per_syndna_per_sample_df, min_count)

self.assert_lingressresult_dict_almost_equal(
self.lingress_results, out_linregress_dict)
self.assertEqual([], out_msgs)

def test_fit_linear_regression_models_w_log_msgs(self):
min_count = 200

Expand Down

0 comments on commit 8513c6c

Please sign in to comment.