From 777e97526ffde25fee9ae6e598200af43acaa5df Mon Sep 17 00:00:00 2001 From: Yang Chen <60239063+yangchen2@users.noreply.github.com> Date: Mon, 27 Nov 2023 12:07:15 -0700 Subject: [PATCH 1/5] filter out discriminatory taxa --- qadabra/utils.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/qadabra/utils.py b/qadabra/utils.py index ae7104d..73ec524 100644 --- a/qadabra/utils.py +++ b/qadabra/utils.py @@ -4,7 +4,7 @@ import biom import pandas as pd - +import warnings def _validate_input( logger: logging.Logger, @@ -51,11 +51,22 @@ def _validate_input( joint_df = tbl_df.join(md) gb = joint_df.groupby(factor_name).sum(numeric_only=True) feat_presence = gb.apply(lambda x: x.all()) - if not feat_presence.all(): - raise ValueError( - "Some taxa in the table perfectly discriminate factor groups. " - "Please filter out these taxa before running Qadabra." - ) + # if not feat_presence.all(): + # raise ValueError( + # "Some taxa in the table perfectly discriminate factor groups. " + # "Please filter out these taxa before running Qadabra." + # ) + discriminating_feats = feat_presence[~feat_presence].index.tolist() + + if len(discriminating_feats) > 0: + warning_msg = "Some features in the table perfectly discriminate factor groups:\n" + '\n'.join(discriminating_feats) + ".\nAutomatically filtering out these features before running Qadabra..." + print("Number of discriminating features: " + str(len(discriminating_feats))) + warnings.warn(warning_msg, category=Warning) + + # Filtering out the discriminating features from the BIOM table + tbl = tbl.filter(lambda value, id_, metadata: id_ not in discriminating_feats, axis='observation', inplace=False) + logger.info(f"Table shape after filtering: {tbl.shape}") + if tree: from bp import parse_newick, to_skbio_treenode @@ -69,4 +80,4 @@ def _validate_input( raise ValueError("Tree tips are not a subset of table features!") else: logger.info("Reading phylogenetic tree...") - logger.info("(Optional tree file not provided. Skipping tree validation.)") + logger.info("(Optional tree file not provided. Skipping tree validation.)") \ No newline at end of file From 889f414f7bda9b4707388a4fec1e088c35c85c14 Mon Sep 17 00:00:00 2001 From: Yang Chen <60239063+yangchen2@users.noreply.github.com> Date: Mon, 27 Nov 2023 12:08:34 -0700 Subject: [PATCH 2/5] remove previous code block --- qadabra/utils.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/qadabra/utils.py b/qadabra/utils.py index 73ec524..7c13096 100644 --- a/qadabra/utils.py +++ b/qadabra/utils.py @@ -51,11 +51,7 @@ def _validate_input( joint_df = tbl_df.join(md) gb = joint_df.groupby(factor_name).sum(numeric_only=True) feat_presence = gb.apply(lambda x: x.all()) - # if not feat_presence.all(): - # raise ValueError( - # "Some taxa in the table perfectly discriminate factor groups. " - # "Please filter out these taxa before running Qadabra." - # ) + discriminating_feats = feat_presence[~feat_presence].index.tolist() if len(discriminating_feats) > 0: From 671901343e1bb0a8939323725534e252ba14ca5c Mon Sep 17 00:00:00 2001 From: Yang Chen <60239063+yangchen2@users.noreply.github.com> Date: Fri, 19 Jan 2024 11:39:55 -0800 Subject: [PATCH 3/5] update README to say QADABRA is WIP software --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 456e061..c71c608 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ Importantly, Qadabra focuses on both FDR corrected p-values *and* [feature ranks ![Schematic](images/Qadabra_schematic.svg) +Please note this software is currently a work in progress. Your patience is appreciated as we continue to develop and enhance its features. Please leave an issue on GitHub should you run into any errors. + ## Installation ``` pip install qadabra @@ -97,7 +99,7 @@ This will create a zipped directory containing the report. Unzip this file and open the `report.html` file to view the report containing results and visualizations in your browser. ## Tutorial -See the [tutorial](tutorial.md) page for a walkthroughon using Qadabra workflow with a microbiome dataset. +See the [tutorial](tutorial.md) page for a walkthrough on using Qadabra workflow with a microbiome dataset. ## FAQs Coming soon: An [FAQs](FAQs.md) page of commonly asked question on the statistics and code pertaining to Qadabra. From 71de05c4c6153384cc455eed02bd0d54004915c3 Mon Sep 17 00:00:00 2001 From: Yang Chen <60239063+yangchen2@users.noreply.github.com> Date: Fri, 19 Jan 2024 13:56:42 -0800 Subject: [PATCH 4/5] f-string format for discriminatory features warning, log number of discrim feats --- qadabra/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qadabra/utils.py b/qadabra/utils.py index 7c13096..56dd0f5 100644 --- a/qadabra/utils.py +++ b/qadabra/utils.py @@ -55,8 +55,8 @@ def _validate_input( discriminating_feats = feat_presence[~feat_presence].index.tolist() if len(discriminating_feats) > 0: - warning_msg = "Some features in the table perfectly discriminate factor groups:\n" + '\n'.join(discriminating_feats) + ".\nAutomatically filtering out these features before running Qadabra..." - print("Number of discriminating features: " + str(len(discriminating_feats))) + logger.warn("Number of discriminating features: " + str(len(discriminating_feats))) + warning_msg = f"Some features in the table perfectly discriminate factor groups. Automatically filtering out {len(discriminating_feats)} features before running Qadabra..." warnings.warn(warning_msg, category=Warning) # Filtering out the discriminating features from the BIOM table From a78121a70f8ba3882109cd0f0660eac7c10e1706 Mon Sep 17 00:00:00 2001 From: Yang Chen <60239063+yangchen2@users.noreply.github.com> Date: Fri, 19 Jan 2024 16:27:08 -0800 Subject: [PATCH 5/5] add install from source instructions --- README.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 57584ee..86681a7 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,8 @@ Importantly, Qadabra focuses on both FDR corrected p-values *and* [feature ranks Please note this software is currently a work in progress. Your patience is appreciated as we continue to develop and enhance its features. Please leave an issue on GitHub should you run into any errors. ## Installation + +### Option 1: Pip install from [PyPI](https://pypi.org/project/qadabra/0.3.0a1/) ``` pip install qadabra ``` @@ -26,12 +28,31 @@ Qadabra requires the following dependencies: * cython * iow +Check out the [tutorial](tutorial.md) for more in-depth instructions on installation. + + +### Option 2: Install from source (this GitHub repository) +Prerequisites + +Before you begin, ensure you have Git and the necessary build tools installed on your system. + +Clone the Repository +``` +git clone https://github.com/biocore/qadabra.git +``` + +Navigate to repo root directory where the `setup.py` file is located and then install QADABRA in editable mode +``` +cd qadabra +pip install -e . +``` + ## Usage ### 1. Creating the workflow directory Qadabra can be used on multiple datasets at once. -First, we want to create the workflow directory to perfrom differential abundance with all methods: +First, we want to create the workflow directory to perform differential abundance with all methods: ``` qadabra create-workflow --workflow-dest