diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9213ae5ea5..902076f54f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,8 @@
 
 ### Backward-incompatible changes [experimental]
 * `TabularMSA.append` and `TabularMSA.extend` now require one of `minter`, `index`, or `reset_index` to be provided when incorporating new sequences into an MSA. Previous behavior was to auto-increment the index labels if `minter` and `index` weren't provided and the MSA had a default integer index, otherwise error. Use `reset_index=True` to obtain the previous behavior in a more explicit way.
+* `skbio.stats.composition.ancom` now returns two `pd.DataFrame` objects, where it previously returned one. The first contains the ANCOM test results, as before, and the second contains percentile abundances of each feature in each group. The specific percentiles that are computed and returned is controlled by the new `percentiles` parameter to `skbio.stats.composition.ancom`. In the future, this second `pd.DataFrame` will not be returned by this function, but will be available through the [contingency table API](https://github.com/biocore/scikit-bio/issues/848). ([#1293](https://github.com/biocore/scikit-bio/issues/1293))
+* `skbio.stats.composition.ancom` now performs multiple comparisons correction by default. The previous behavior of not performing multiple comparisons correction can be achieved by passing ``multiple_comparisons_correction=None``.
 
 ### Bug fixes
 * Fixed row and column names to `biplot_scores` in the `OrdinationResults` object from `skbio.stats.ordination`. This fix affect the `cca` and `rda` methods. ([#1322](https://github.com/biocore/scikit-bio/issues/1322))
diff --git a/skbio/stats/composition.py b/skbio/stats/composition.py
index e1bb00fff8..b7ccd83596 100644
--- a/skbio/stats/composition.py
+++ b/skbio/stats/composition.py
@@ -102,6 +102,7 @@
 import numpy as np
 import pandas as pd
 import scipy.stats
+import skbio.util
 from skbio.util._decorator import experimental
 
 
@@ -643,16 +644,16 @@ def ancom(table, grouping,
           alpha=0.05,
           tau=0.02,
           theta=0.1,
-          multiple_comparisons_correction=None,
+          multiple_comparisons_correction='holm-bonferroni',
           significance_test=None,
-          percentiles=[0.0, 25.0, 50.0, 75.0, 100.0]):
+          percentiles=(0.0, 25.0, 50.0, 75.0, 100.0)):
     r""" Performs a differential abundance test using ANCOM.
 
     This is done by calculating pairwise log ratios between all features
     and performing a significance test to determine if there is a significant
     difference in feature ratios with respect to the variable of interest.
 
-    In an experiment with only two treatments, this test tests the following
+    In an experiment with only two treatments, this tests the following
     hypothesis for feature :math:`i`
 
     .. math::
@@ -697,6 +698,10 @@ def ancom(table, grouping,
         classes.  This function must be able to accept at least two 1D
         array_like arguments of floats and returns a test statistic and a
         p-value. By default ``scipy.stats.f_oneway`` is used.
+    percentiles : iterable of floats, optional
+        Percentile abundances to return for each feature in each group. By
+        default, will return the minimum, 25th percentile, median, 75th
+        percentile, and maximum abundances for each feature in each group.
 
     Returns
     -------
@@ -709,6 +714,13 @@ def ancom(table, grouping,
 
         `"reject"` indicates if feature is significantly different or not.
 
+    pd.DataFrame
+        A table of features and their percentile abundances in each group. If
+        ``percentiles`` is empty, this will be an empty ``pd.DataFrame``. The
+        rows in this object will be features, and the columns will be a
+        multi-index where the first index is the percentile, and the second
+        index is the group.
+
     See Also
     --------
     multiplicative_replacement
@@ -733,9 +745,11 @@ def ancom(table, grouping,
 
     This method cannot handle any zero counts as input, since the logarithm
     of zero cannot be computed.  While this is an unsolved problem, many
-    studies have shown promising results by replacing the zeros with pseudo
-    counts. This can be also be done via the ``multiplicative_replacement``
-    method.
+    studies, including [2]_, have shown promising results by adding
+    pseudocounts to all values in the matrix. In [2]_, a pseudocount of 0.001
+    was used, though the authors note that a pseudocount of 1.0 may also be
+    useful. Zero counts can also be addressed using the
+    ``multiplicative_replacement`` method.
 
     References
     ----------
@@ -752,7 +766,8 @@ def ancom(table, grouping,
     >>> from skbio.stats.composition import ancom
     >>> import pandas as pd
 
-    Now let's load in a pd.DataFrame with 6 samples and 7 unknown bacteria:
+    Now let's load in a DataFrame with 6 samples and 7 features (e.g.,
+    these may be bacterial OTUs):
 
     >>> table = pd.DataFrame([[12, 11, 10, 10, 10, 10, 10],
     ...                       [9,  11, 12, 10, 10, 10, 10],
@@ -760,25 +775,28 @@ def ancom(table, grouping,
     ...                       [22, 21, 9,  10, 10, 10, 10],
     ...                       [20, 22, 10, 10, 13, 10, 10],
     ...                       [23, 21, 14, 10, 10, 10, 10]],
-    ...                      index=['s1','s2','s3','s4','s5','s6'],
-    ...                      columns=['b1','b2','b3','b4','b5','b6','b7'])
+    ...                      index=['s1', 's2', 's3', 's4', 's5', 's6'],
+    ...                      columns=['b1', 'b2', 'b3', 'b4', 'b5', 'b6',
+    ...                               'b7'])
 
-    Then create a grouping vector.  In this scenario, there
-    are only two classes, and suppose these classes correspond to the
-    treatment due to a drug and a control.  The first three samples
-    are controls and the last three samples are treatments.
+    Then create a grouping vector. In this example, there is a treatment group
+    and a placebo group.
 
-    >>> grouping = pd.Series([0, 0, 0, 1, 1, 1],
-    ...                      index=['s1','s2','s3','s4','s5','s6'])
+    >>> grouping = pd.Series(['treatment', 'treatment', 'treatment',
+    ...                       'placebo', 'placebo', 'placebo'],
+    ...                      index=['s1', 's2', 's3', 's4', 's5', 's6'])
 
-    Now run ``ancom`` and see if there are any features that have any
-    significant differences between the treatment and the control.
+    Now run ``ancom`` to determine if there are any features that are
+    significantly different in abundance between the treatment and the placebo
+    groups. The first DataFrame that is returned contains the ANCOM test
+    results, and the second contains the percentile abundance data for each
+    feature in each group.
 
-    >>> results = ancom(table, grouping)
-    >>> results['W']
+    >>> ancom_df, percentile_df = ancom(table, grouping)
+    >>> ancom_df['W']
     b1    0
     b2    4
-    b3    1
+    b3    0
     b4    1
     b5    1
     b6    0
@@ -788,10 +806,13 @@ def ancom(table, grouping,
     The W-statistic is the number of features that a single feature is tested
     to be significantly different against.  In this scenario, `b2` was detected
     to have significantly different abundances compared to four of the other
-    species. To summarize the results from the W-statistic, let's take a look
-    at the results from the hypothesis test:
+    features. To summarize the results from the W-statistic, let's take a look
+    at the results from the hypothesis test. The `Reject null hypothesis`
+    column in the table indicates whether the null hypothesis was rejected,
+    and that a feature was therefore observed to be differentially abundant
+    across the groups.
 
-    >>> results['reject']
+    >>> ancom_df['Reject null hypothesis']
     b1    False
     b2     True
     b3    False
@@ -799,10 +820,42 @@ def ancom(table, grouping,
     b5    False
     b6    False
     b7    False
-    Name: reject, dtype: bool
-
-    From this we can conclude that only `b2` was significantly
-    different between the treatment and the control.
+    Name: Reject null hypothesis, dtype: bool
+
+    From this we can conclude that only `b2` was significantly different in
+    abundance between the treatment and the placebo. We still don't know, for
+    example, in which group `b2` was more abundant. We therefore may next be
+    interested in comparing the abundance of `b2` across the two groups.
+    We can do that using the second DataFrame that was returned. Here we
+    compare the median (50th percentile) abundance of `b2` in the treatment and
+    placebo groups:
+
+    >>> percentile_df[50.0].loc['b2']
+    Group
+    placebo      21.0
+    treatment    11.0
+    Name: b2, dtype: float64
+
+    We can also look at a full five-number summary for ``b2`` in the treatment
+    and placebo groups:
+
+    >>> percentile_df.loc['b2'] # doctest: +NORMALIZE_WHITESPACE
+    Percentile  Group
+    0.0         placebo      21.0
+    25.0        placebo      21.0
+    50.0        placebo      21.0
+    75.0        placebo      21.5
+    100.0       placebo      22.0
+    0.0         treatment    11.0
+    25.0        treatment    11.0
+    50.0        treatment    11.0
+    75.0        treatment    11.0
+    100.0       treatment    11.0
+    Name: b2, dtype: float64
+
+    Taken together, these data tell us that `b2` is present in significantly
+    higher abundance in the placebo group samples than in the treatment group
+    samples.
 
     """
     if not isinstance(table, pd.DataFrame):
@@ -814,7 +867,7 @@ def ancom(table, grouping,
 
     if np.any(table <= 0):
         raise ValueError('Cannot handle zeros or negative values in `table`. '
-                         'Use pseudo counts or ``multiplicative_replacement``.'
+                         'Use pseudocounts or ``multiplicative_replacement``.'
                          )
 
     if not 0 < alpha < 1:
@@ -838,9 +891,20 @@ def ancom(table, grouping,
     if (table.isnull()).any().any():
         raise ValueError('Cannot handle missing values in `table`.')
 
-    input_grouping = grouping.copy()
-    groups, _grouping = np.unique(grouping, return_inverse=True)
-    grouping = pd.Series(_grouping, index=grouping.index)
+    percentiles = list(percentiles)
+    for percentile in percentiles:
+        if not 0.0 <= percentile <= 100.0:
+            raise ValueError('Percentiles must be in the range [0, 100], %r '
+                             'was provided.' % percentile)
+
+    duplicates = skbio.util.find_duplicates(percentiles)
+    if duplicates:
+        formatted_duplicates = ', '.join(repr(e) for e in duplicates)
+        raise ValueError('Percentile values must be unique. The following'
+                         ' value(s) were duplicated: %s.' %
+                         formatted_duplicates)
+
+    groups = np.unique(grouping)
     num_groups = len(groups)
 
     if num_groups == len(grouping):
@@ -867,22 +931,6 @@ def ancom(table, grouping,
         raise ValueError('`table` index and `grouping` '
                          'index must be consistent.')
 
-    # Compute DataFrame of mean/std abundances for all features on a
-    # per category basis.
-    cat_values = input_grouping.values
-    cs = np.unique(cat_values)
-    cat_dists = {k: mat[cat_values == k] for k in cs}
-    cat_percentiles = []
-    for percentile in percentiles:
-        data = {k: np.percentile(v, percentile, axis=0)
-                     for k, v in cat_dists.items()}
-        data = pd.DataFrame.from_dict(data)
-        data.index = mat.columns
-        data.columns = ['%s: %r percentile' % (e, percentile)
-                        for e in data.columns]
-        cat_percentiles.append(data)
-    cat_percentiles = pd.concat(cat_percentiles, axis=1)
-
     n_feat = mat.shape[1]
 
     _logratio_mat = _log_compare(mat.values, cats.values, significance_test)
@@ -913,10 +961,28 @@ def ancom(table, grouping,
         else:
             nu = cutoff[4]
         reject = (W >= nu*n_feat)
-    labs = mat.columns
-    ancom_df = pd.DataFrame({'W': pd.Series(W, index=labs),
-                           'reject': pd.Series(reject, index=labs)})
-    return pd.concat([ancom_df, cat_percentiles], axis=1)
+
+    feat_ids = mat.columns
+    ancom_df = pd.DataFrame(
+        {'W': pd.Series(W, index=feat_ids),
+         'Reject null hypothesis': pd.Series(reject, index=feat_ids)})
+
+    if len(percentiles) == 0:
+        return ancom_df, pd.DataFrame()
+    else:
+        data = []
+        columns = []
+        for group in groups:
+            feat_dists = mat[grouping == group]
+            for percentile in percentiles:
+                columns.append((percentile, group))
+                data.append(np.percentile(feat_dists, percentile, axis=0))
+        columns = pd.MultiIndex.from_tuples(columns,
+                                            names=['Percentile', 'Group'])
+        percentile_df = pd.DataFrame(
+            np.asarray(data).T, columns=columns, index=feat_ids)
+        return ancom_df, percentile_df
+
 
 def _holm_bonferroni(p):
     """ Performs Holm-Bonferroni correction for pvalues
diff --git a/skbio/stats/tests/test_composition.py b/skbio/stats/tests/test_composition.py
index 554fb18acc..6dd2f122c3 100644
--- a/skbio/stats/tests/test_composition.py
+++ b/skbio/stats/tests/test_composition.py
@@ -546,12 +546,197 @@ def test_ancom_basic_counts(self):
         assert_data_frame_almost_equal(original_table, test_table)
         # Test to make sure that the input table hasn't be altered
         pdt.assert_series_equal(original_cats, test_cats)
-        exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
-                            'reject': np.array([True, True, False, False,
-                                                False, False, False],
-                                               dtype=bool)})
+        exp = pd.DataFrame(
+            {'W': np.array([5, 5, 2, 2, 2, 2, 2]),
+             'Reject null hypothesis': np.array([True, True, False, False,
+                                                 False, False, False],
+                                                dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
+
+    def test_ancom_basic_counts_shuffled_order(self):
+        test_table = pd.DataFrame(self.table1)
+        original_table = copy.deepcopy(test_table)
+        test_cats = pd.Series(self.cats1)
+        # reverse the order of the grouping to confirm that
+        # order doesn't matter.
+        test_cats = test_cats.reindex(reversed(test_cats.index))
+        original_cats = copy.deepcopy(test_cats)
+        result = ancom(test_table,
+                       test_cats,
+                       multiple_comparisons_correction=None)
+        # Test to make sure that the input table hasn't be altered
+        assert_data_frame_almost_equal(original_table, test_table)
+        # Test to make sure that the input table hasn't be altered
+        pdt.assert_series_equal(original_cats, test_cats)
+        exp = pd.DataFrame(
+            {'W': np.array([5, 5, 2, 2, 2, 2, 2]),
+             'Reject null hypothesis': np.array([True, True, False, False,
+                                                 False, False, False],
+                                                dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
+
+    def test_ancom_percentiles(self):
+        table = pd.DataFrame([[12, 11],
+                              [9, 11],
+                              [1, 11],
+                              [22, 100],
+                              [20, 53],
+                              [23, 1]],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'],
+                             columns=['b1', 'b2'])
+        grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'])
+
+        percentiles = [0.0, 25.0, 50.0, 75.0, 100.0]
+        groups = ['a', 'b']
+        tuples = [(p, g) for g in groups for p in percentiles]
+        exp_mi = pd.MultiIndex.from_tuples(tuples,
+                                           names=['Percentile', 'Group'])
+        exp_data = np.array(
+            [[1.0, 11.0], [5.0, 11.0], [9.0, 11.0], [10.5, 11.0], [12.0, 11.0],
+             [20.0, 1.0], [21.0, 27.0], [22.0, 53.0], [22.5, 76.5],
+             [23.0, 100.0]])
+        exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1', 'b2'])
+
+        result = ancom(table, grouping, percentiles=percentiles)[1]
+        assert_data_frame_almost_equal(result, exp)
+
+    def test_ancom_percentiles_alt_categories(self):
+        table = pd.DataFrame([[12],
+                              [9],
+                              [1],
+                              [22],
+                              [20],
+                              [23]],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'],
+                             columns=['b1'])
+        grouping = pd.Series(['a', 'a', 'c', 'b', 'b', 'c'],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'])
+
+        percentiles = [0.0, 25.0, 50.0, 75.0, 100.0]
+        groups = ['a', 'b', 'c']
+        tuples = [(p, g) for g in groups for p in percentiles]
+        exp_mi = pd.MultiIndex.from_tuples(tuples,
+                                           names=['Percentile', 'Group'])
+        exp_data = np.array([[9.0], [9.75], [10.5], [11.25], [12.0],  # a
+                             [20.0], [20.5], [21.0], [21.5], [22.0],  # b
+                             [1.0], [6.5], [12.0], [17.5], [23.0]])   # c
+        exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1'])
+
+        result = ancom(table, grouping, percentiles=percentiles)[1]
+        assert_data_frame_almost_equal(result, exp)
+
+    def test_ancom_alt_percentiles(self):
+        table = pd.DataFrame([[12],
+                              [9],
+                              [1],
+                              [22],
+                              [20],
+                              [23]],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'],
+                             columns=['b1'])
+        grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'])
+
+        percentiles = [42.0, 50.0]
+        groups = ['a', 'b']
+        tuples = [(p, g) for g in groups for p in percentiles]
+        exp_mi = pd.MultiIndex.from_tuples(tuples,
+                                           names=['Percentile', 'Group'])
+        exp_data = np.array([[7.71999999], [9.0],  # a
+                             [21.68], [22.0]])     # b
+        exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1'])
+
+        result = ancom(table, grouping, percentiles=percentiles)[1]
         assert_data_frame_almost_equal(result, exp)
 
+    def test_ancom_percentile_order_unimportant(self):
+        table = pd.DataFrame([[12],
+                              [9],
+                              [1],
+                              [22],
+                              [20],
+                              [23]],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'],
+                             columns=['b1'])
+        grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'])
+        # order of percentiles in unimportant after sorting
+        result1 = ancom(table, grouping, percentiles=[50.0, 42.0])[1]
+        result2 = ancom(table, grouping, percentiles=[42.0, 50.0])[1]
+        assert_data_frame_almost_equal(
+            result1.sort_index(axis=1), result2.sort_index(axis=1))
+
+    def test_ancom_percentiles_iterator(self):
+        table = pd.DataFrame([[12],
+                              [9],
+                              [1],
+                              [22],
+                              [20],
+                              [23]],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'],
+                             columns=['b1'])
+        grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'])
+
+        percentiles = [42.0, 50.0]
+        groups = ['a', 'b']
+        tuples = [(p, g) for g in groups for p in percentiles]
+        exp_mi = pd.MultiIndex.from_tuples(tuples,
+                                           names=['Percentile', 'Group'])
+        exp_data = np.array([[7.71999999], [9.0],  # a
+                             [21.68], [22.0]])     # b
+        exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1'])
+
+        result = ancom(table, grouping, percentiles=iter(percentiles))[1]
+        assert_data_frame_almost_equal(result, exp)
+
+    def test_ancom_no_percentiles(self):
+        table = pd.DataFrame([[12],
+                              [9],
+                              [1],
+                              [22],
+                              [20],
+                              [23]],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'],
+                             columns=['b1'])
+        grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'])
+        result = ancom(table, grouping, percentiles=[])[1]
+        assert_data_frame_almost_equal(result, pd.DataFrame())
+
+    def test_ancom_percentile_out_of_range(self):
+        table = pd.DataFrame([[12],
+                              [9],
+                              [1],
+                              [22],
+                              [20],
+                              [23]],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'],
+                             columns=['b1'])
+        grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'])
+        with self.assertRaises(ValueError):
+            ancom(table, grouping, percentiles=[-1.0])
+        with self.assertRaises(ValueError):
+            ancom(table, grouping, percentiles=[100.1])
+        with self.assertRaises(ValueError):
+            ancom(table, grouping, percentiles=[10.0, 3.0, 101.0, 100])
+
+    def test_ancom_duplicate_percentiles(self):
+        table = pd.DataFrame([[12],
+                              [9],
+                              [1],
+                              [22],
+                              [20],
+                              [23]],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'],
+                             columns=['b1'])
+        grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'],
+                             index=['s1', 's2', 's3', 's4', 's5', 's6'])
+        with self.assertRaises(ValueError):
+            ancom(table, grouping, percentiles=[10.0, 10.0])
+
     def test_ancom_basic_proportions(self):
         # Converts from counts to proportions
         test_table = pd.DataFrame(closure(self.table1))
@@ -565,11 +750,12 @@ def test_ancom_basic_proportions(self):
         assert_data_frame_almost_equal(original_table, test_table)
         # Test to make sure that the input table hasn't be altered
         pdt.assert_series_equal(original_cats, test_cats)
-        exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
-                            'reject': np.array([True, True, False, False,
-                                                False, False, False],
-                                               dtype=bool)})
-        assert_data_frame_almost_equal(result, exp)
+        exp = pd.DataFrame(
+            {'W': np.array([5, 5, 2, 2, 2, 2, 2]),
+             'Reject null hypothesis': np.array([True, True, False, False,
+                                                 False, False, False],
+                                                dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
 
     def test_ancom_multiple_groups(self):
         test_table = pd.DataFrame(self.table4)
@@ -581,135 +767,152 @@ def test_ancom_multiple_groups(self):
         assert_data_frame_almost_equal(original_table, test_table)
         # Test to make sure that the input table hasn't be altered
         pdt.assert_series_equal(original_cats, test_cats)
-        exp = pd.DataFrame({'W': np.array([8, 7, 3, 3, 7, 3, 3, 3, 3]),
-                            'reject': np.array([True, True, False, False,
-                                                True, False, False, False,
-                                                False], dtype=bool)})
-        assert_data_frame_almost_equal(result, exp)
+        exp = pd.DataFrame(
+            {'W': np.array([8, 7, 3, 3, 7, 3, 3, 3, 3]),
+             'Reject null hypothesis': np.array([True, True, False, False,
+                                                 True, False, False, False,
+                                                 False], dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
 
     def test_ancom_noncontiguous(self):
         result = ancom(self.table5,
                        self.cats5,
                        multiple_comparisons_correction=None)
-        exp = pd.DataFrame({'W': np.array([6, 2, 2, 2, 2, 6, 2]),
-                            'reject': np.array([True, False, False, False,
-                                                False, True, False],
-                                               dtype=bool)})
-        assert_data_frame_almost_equal(result, exp)
+        exp = pd.DataFrame(
+            {'W': np.array([6, 2, 2, 2, 2, 6, 2]),
+             'Reject null hypothesis': np.array([True, False, False, False,
+                                                 False, True, False],
+                                                dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
 
     def test_ancom_unbalanced(self):
         result = ancom(self.table6,
                        self.cats6,
                        multiple_comparisons_correction=None)
-        exp = pd.DataFrame({'W': np.array([5, 3, 3, 2, 2, 5, 2]),
-                            'reject': np.array([True, False, False, False,
-                                                False, True, False],
-                                               dtype=bool)})
-        assert_data_frame_almost_equal(result, exp)
+        exp = pd.DataFrame(
+            {'W': np.array([5, 3, 3, 2, 2, 5, 2]),
+             'Reject null hypothesis': np.array([True, False, False, False,
+                                                 False, True, False],
+                                                dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
 
     def test_ancom_letter_categories(self):
         result = ancom(self.table7,
                        self.cats7,
                        multiple_comparisons_correction=None)
-        exp = pd.DataFrame({'W': np.array([5, 3, 3, 2, 2, 5, 2]),
-                            'reject': np.array([True, False, False, False,
-                                                False, True, False],
-                                               dtype=bool)})
-        assert_data_frame_almost_equal(result, exp)
+        exp = pd.DataFrame(
+            {'W': np.array([5, 3, 3, 2, 2, 5, 2]),
+             'Reject null hypothesis': np.array([True, False, False, False,
+                                                 False, True, False],
+                                                dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
 
     def test_ancom_multiple_comparisons(self):
         result = ancom(self.table1,
                        self.cats1,
                        multiple_comparisons_correction='holm-bonferroni',
                        significance_test=scipy.stats.mannwhitneyu)
-        exp = pd.DataFrame({'W': np.array([0]*7),
-                            'reject': np.array([False]*7, dtype=bool)})
-        assert_data_frame_almost_equal(result, exp)
+        exp = pd.DataFrame(
+            {'W': np.array([0]*7),
+             'Reject null hypothesis': np.array([False]*7, dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
 
     def test_ancom_alternative_test(self):
         result = ancom(self.table1,
                        self.cats1,
                        multiple_comparisons_correction=None,
                        significance_test=scipy.stats.ttest_ind)
-        exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
-                            'reject': np.array([True,  True, False, False,
-                                                False, False, False],
-                                               dtype=bool)})
-        assert_data_frame_almost_equal(result, exp)
+        exp = pd.DataFrame(
+            {'W': np.array([5, 5, 2, 2, 2, 2, 2]),
+             'Reject null hypothesis': np.array([True,  True, False, False,
+                                                 False, False, False],
+                                                dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
 
     def test_ancom_normal_data(self):
         result = ancom(self.table2,
                        self.cats2,
                        multiple_comparisons_correction=None,
                        significance_test=scipy.stats.ttest_ind)
-        exp = pd.DataFrame({'W': np.array([8, 8, 3, 3,
-                                           8, 3, 3, 3, 3]),
-                            'reject': np.array([True, True, False, False,
-                                                True, False, False,
-                                                False, False],
-                                               dtype=bool)})
-        assert_data_frame_almost_equal(result, exp)
+        exp = pd.DataFrame(
+            {'W': np.array([8, 8, 3, 3, 8, 3, 3, 3, 3]),
+             'Reject null hypothesis': np.array([True, True, False, False,
+                                                 True, False, False,
+                                                 False, False],
+                                                dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
 
     def test_ancom_basic_counts_swapped(self):
         result = ancom(self.table8, self.cats8)
-        exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
-                            'reject': np.array([True, True, False, False,
-                                                False, False, False],
-                                               dtype=bool)})
-        assert_data_frame_almost_equal(result, exp)
+        exp = pd.DataFrame(
+            {'W': np.array([5, 5, 2, 2, 2, 2, 2]),
+             'Reject null hypothesis': np.array([True, True, False, False,
+                                                 False, False, False],
+                                                dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
 
     def test_ancom_no_signal(self):
         result = ancom(self.table3,
                        self.cats3,
                        multiple_comparisons_correction=None)
-        exp = pd.DataFrame({'W': np.array([0]*7),
-                            'reject': np.array([False]*7, dtype=bool)})
-        assert_data_frame_almost_equal(result, exp)
+        exp = pd.DataFrame(
+            {'W': np.array([0]*7),
+             'Reject null hypothesis': np.array([False]*7, dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
 
     def test_ancom_tau(self):
-        exp1 = pd.DataFrame({'W': np.array([8, 7, 3, 3, 7, 3, 3, 3, 3]),
-                            'reject': np.array([True, False, False, False,
-                                                False, False, False, False,
-                                                False], dtype=bool)})
-        exp2 = pd.DataFrame({'W': np.array([17, 17, 5, 6, 16, 5, 7, 5,
-                                            4, 5, 8, 4, 5, 16, 5, 11, 4, 6]),
-                            'reject': np.array([True, True, False, False,
-                                                True, False, False, False,
-                                                False, False, False, False,
-                                                False, True, False, False,
-                                                False, False],  dtype=bool)})
-        exp3 = pd.DataFrame({'W': np.array([16, 16, 17, 10, 17, 16, 16,
-                                            15, 15, 15, 13, 10, 10, 10,
-                                            9, 9, 9, 9]),
-                            'reject': np.array([True, True, True, False,
-                                                True, True, True, True,
-                                                True, True, True, False,
-                                                False, False, False, False,
-                                                False, False],  dtype=bool)})
-
-        result1 = ancom(self.table4, self.cats4, tau=0.25)
-        result2 = ancom(self.table9, self.cats9, tau=0.02)
-        result3 = ancom(self.table10, self.cats10, tau=0.02)
-
-        assert_data_frame_almost_equal(result1, exp1)
-        assert_data_frame_almost_equal(result2, exp2)
-        assert_data_frame_almost_equal(result3, exp3)
+        exp1 = pd.DataFrame(
+            {'W': np.array([8, 7, 3, 3, 7, 3, 3, 3, 3]),
+             'Reject null hypothesis': np.array([True, False, False, False,
+                                                 False, False, False, False,
+                                                 False], dtype=bool)})
+        exp2 = pd.DataFrame(
+            {'W': np.array([17, 17, 5, 6, 16, 5, 7, 5,
+                            4, 5, 8, 4, 5, 16, 5, 11, 4, 6]),
+             'Reject null hypothesis': np.array([True, True, False, False,
+                                                 True, False, False, False,
+                                                 False, False, False, False,
+                                                 False, True, False, False,
+                                                 False, False],  dtype=bool)})
+        exp3 = pd.DataFrame(
+            {'W': np.array([16, 16, 17, 10, 17, 16, 16,
+                            15, 15, 15, 13, 10, 10, 10,
+                            9, 9, 9, 9]),
+             'Reject null hypothesis': np.array([True, True, True, False,
+                                                 True, True, True, True,
+                                                 True, True, True, False,
+                                                 False, False, False, False,
+                                                 False, False], dtype=bool)})
+
+        result1 = ancom(self.table4, self.cats4,
+                        multiple_comparisons_correction=None, tau=0.25)
+        result2 = ancom(self.table9, self.cats9,
+                        multiple_comparisons_correction=None, tau=0.02)
+        result3 = ancom(self.table10, self.cats10,
+                        multiple_comparisons_correction=None, tau=0.02)
+
+        assert_data_frame_almost_equal(result1[0], exp1)
+        assert_data_frame_almost_equal(result2[0], exp2)
+        assert_data_frame_almost_equal(result3[0], exp3)
 
     def test_ancom_theta(self):
         result = ancom(self.table1, self.cats1, theta=0.3)
-        exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
-                            'reject': np.array([True, True, False, False,
-                                                False, False, False],
-                                               dtype=bool)})
-        assert_data_frame_almost_equal(result, exp)
+        exp = pd.DataFrame(
+            {'W': np.array([5, 5, 2, 2, 2, 2, 2]),
+             'Reject null hypothesis': np.array([True, True, False, False,
+                                                 False, False, False],
+                                                dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
 
     def test_ancom_alpha(self):
-        result = ancom(self.table1, self.cats1, alpha=0.5)
-        exp = pd.DataFrame({'W': np.array([6, 6, 4, 5, 5, 4, 2]),
-                            'reject': np.array([True, True, False, True,
-                                                True, False, False],
-                                               dtype=bool)})
-        assert_data_frame_almost_equal(result, exp)
+        result = ancom(self.table1, self.cats1,
+                       multiple_comparisons_correction=None, alpha=0.5)
+        exp = pd.DataFrame(
+            {'W': np.array([6, 6, 4, 5, 5, 4, 2]),
+             'Reject null hypothesis': np.array([True, True, False, True,
+                                                 True, False, False],
+                                                dtype=bool)})
+        assert_data_frame_almost_equal(result[0], exp)
 
     def test_ancom_fail_type(self):
         with self.assertRaises(TypeError):