Refactor/clean score (#4)

* rename alignment_score to score * make function private, add wrapper * cleanup * type ignore * update tests * add test correct result * update docs
letiziaia · Mar 10, 2024 · 65a80a1 · 65a80a1
1 parent 859e465
commit 65a80a1
Show file tree

Hide file tree

Showing 10 changed files with 177 additions and 43 deletions.
diff --git a/README.md b/README.md
@@ -12,6 +12,26 @@ This repository implements an algorithm for quantifying multilayer alignment or
 - `\multilayer_alignment\`: source code
 - `\tests\`: tests for the source code
 
+## Installing the package
+
+### From PIP
+
+This package can be installed directly from the Python Package Index (PyPI) using `pip` from the command-line interface by executing the following command:
+
+```shell
+$ pip install multilayer-alignment
+```
+
+### Build from source
+
+Alternatively, the package can be installed by first cloning the repository containing the source code and then installing the package locally in a chosen directory:
+
+```shell
+$ git clone git@github.com:letiziaia/multilayer-alignment.git
+$ cd multilayer-alignment
+$ pip install .
+```
+
 ## Setting up the development environment
 
 ![python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)

diff --git a/RUNBOOK.md b/RUNBOOK.md
@@ -25,12 +25,14 @@ From root directory,
 $ python -m unittest discover -v
 ```
 
+## User Guide
+
 ### Given opinion partitions for each of the topics, compute the consensus partition
 
 ```python
-# import needed modules
+# import needed libraries
 >>> import pandas as pd
->>> from multilayer_alignment.consensus import get_consensus_partition
+>>> import multilayer_alignment.consensus as mac
 
 # load the opinion labels to a pandas DataFrame
 >>> df = pd.DataFrame(
@@ -44,21 +46,35 @@ $ python -m unittest discover -v
 )
 
 # get consensus partition
->>> get_consensus_partition(opinions=df)
+>>> mac.get_consensus_partition(opinions=df)
+{
+    "A0_B0_C1": {0},
+    "A0_B1_C0": {1},
+    "A1_B0_C1": {2},
+    "A1_B1_C0": {3}
+}
+
+# this function is equivalent, but might be slower
+>>> mac.get_consensus_partition_recursive(opinions=df)
 {
     "A0_B0_C1": {0},
     "A0_B1_C0": {1},
     "A1_B0_C1": {2},
     "A1_B1_C0": {3}
 }
+
+# get list of labels for the consensus partition
+>>> mac.get_consensus_labels(opinions=df)
+['A0_B0_C1', 'A0_B1_C0', 'A1_B0_C1', 'A1_B1_C0']
 ```
 
-Alternatively:
+### Given opinion partitions for each of the topics, compute the multiway alignment score of all of them
 
 ```python
-# import needed modules
+# import needed libraries
 >>> import pandas as pd
->>> from multilayer_alignment.consensus import get_consensus_partition_recursive
+>>> import multilayer_alignment.consensus as mac
+>>> import multilayer_alignment.score as mas
 
 # load the partitions labels to a pandas DataFrame
 >>> df = pd.DataFrame(
@@ -71,12 +87,27 @@ Alternatively:
     }
 )
 
-# get consensus partition
->>> get_consensus_partition_recursive(opinions=df)
-{
-    "A0_B0_C1": {0},
-    "A0_B1_C0": {1},
-    "A1_B0_C1": {2},
-    "A1_B1_C0": {3}
-}
+# get list of labels for the consensus partition
+>>> partition_labels = mac.get_consensus_labels(opinions=df)
+
+# compute 3-way alignment score using AMI (adjusted mutual info score)
+# and adjust with the null model
+>>> mas.multilayer_alignment_score(
+...     df, partition_labels, which_score="ami", adjusted=True,
+... )
+6.40685300762983e-16
+
+# compute 3-way alignment score using NMI (normalized mutual info score)
+# and adjust with the null model
+>>> mas.multilayer_alignment_score(
+...     df, partition_labels, which_score="nmi", adjusted=True,
+... )
+0.0
+
+# if we use NMI (normalized mutual info score) without adjusting it
+# with a null model, the resulting score is inflated
+>>> mas.multilayer_alignment_score(
+...     df, partition_labels, which_score="nmi", adjusted=False,
+... )
+0.6666666666666666
 ```
diff --git a/multilayer_alignment/consensus.py b/multilayer_alignment/consensus.py
@@ -1,8 +1,20 @@
 import pandas as pd
-from typing import Any, Dict, Set, Union
+from typing import Any, Dict, List, Set, Union
 
 
-def get_consensus_labels(consensus_partition: Dict[str, Set[Any]]) -> pd.DataFrame:
+def get_consensus_labels(opinions: Union[pd.DataFrame, pd.Series]) -> List[str]:
+    """
+    :param opinions: pd.DataFrame having one column per topic and one row per individual,
+        where each element a_ij represents the opinion for individual i on topic j
+        and columns names are the topic names
+    :return: List[str], a list of consensus group labels (str)
+    """
+    consensus_dict = get_consensus_partition(opinions=opinions)
+    consensus_df = _get_consensus_labels_df(consensus_partition=consensus_dict)
+    return consensus_df.set_index("id").iloc[opinions.index]["label"].to_list()
+
+
+def _get_consensus_labels_df(consensus_partition: Dict[str, Set[Any]]) -> pd.DataFrame:
     """
     :param consensus_partition: a dictionary of consensus group label (str) -> consesus group members (set)
     :return: pd.DataFrame with column 'id' for the element id and column 'label' for the element label

diff --git a/multilayer_alignment/null_models.py b/multilayer_alignment/null_models.py
@@ -11,7 +11,7 @@
 from multiprocessing.pool import Pool
 from tqdm import tqdm
 
-from multilayer_alignment.alignment_score import maximal_alignment_curve
+from multilayer_alignment.score import maximal_alignment_curve  # type: ignore
 
 from multilayer_alignment.utils.logging import logger
 

diff --git a/multilayer_alignment/alignment_score.py → multilayer_alignment/score.py b/multilayer_alignment/alignment_score.py → multilayer_alignment/score.py
@@ -12,7 +12,6 @@
 from multiprocessing.pool import Pool
 from tqdm import tqdm
 
-from multilayer_alignment.consensus import get_consensus_partition
 from multilayer_alignment.consensus import get_consensus_labels
 
 from multilayer_alignment.utils.logging import logger
@@ -121,13 +120,9 @@ def maximal_alignment_curve(
             # keep only items that have labels for all items in l_comb and reindex
             l_comb_df.dropna(inplace=True)
             l_comb_df.reset_index(drop=True, inplace=True)
-            mutual_clusters = get_consensus_partition(l_comb_df)
-            mutual_clusters_labels = get_consensus_labels(mutual_clusters)
-            labels_list = (
-                mutual_clusters_labels.set_index("id")
-                .iloc[l_comb_df.index]["label"]
-                .to_list()
-            )
+
+            # consensus partition labels
+            labels_list = get_consensus_labels(opinions=l_comb_df)
 
             # CRITERIA
             nmi = multilayer_alignment_score(

diff --git a/multilayer_alignment/visualizations.py b/multilayer_alignment/visualizations.py
@@ -8,7 +8,7 @@
 import seaborn as sns  # type: ignore
 import typing
 
-from multilayer_alignment.alignment_score import maximal_alignment_curve
+from multilayer_alignment.score import maximal_alignment_curve  # type: ignore
 from multilayer_alignment.null_models import expected_curve_equal_sized_clusters
 from multilayer_alignment.utils.logging import logger
 

diff --git a/tests/test_compute_maximal_alignment_curve.py b/tests/test_compute_maximal_alignment_curve.py
@@ -2,12 +2,12 @@
 
 import pandas as pd
 
-from multilayer_alignment.alignment_score import maximal_alignment_curve
+from multilayer_alignment.score import maximal_alignment_curve
 
 
 class TestComputeMaximalAlignmentCurve(unittest.TestCase):
     """
-    Test functionality of mutual_clusters.compute_maximal_alignment_curve()
+    Test functionality of score.compute_maximal_alignment_curve()
     ------------
     Example
     ------------

diff --git a/tests/test_compute_multilayer_alignment_score.py b/tests/test_compute_multilayer_alignment_score.py
@@ -2,7 +2,7 @@
 
 import pandas as pd
 
-from multilayer_alignment.alignment_score import multilayer_alignment_score
+from multilayer_alignment.score import multilayer_alignment_score
 
 
 class TestComputeMultilayerAlignmentScore(unittest.TestCase):

diff --git a/tests/test_get_consensus_labels.py b/tests/test_get_consensus_labels.py
@@ -7,7 +7,7 @@
 
 class TestGetConsensusLabels(unittest.TestCase):
     """
-    Test functionality of mutual_clusters.get_consensus_labels()
+    Test functionality of consensus.get_consensus_labels()
     ------------
     Example
     ------------
@@ -16,38 +16,50 @@ class TestGetConsensusLabels(unittest.TestCase):
 
     def test_on_empty(self):
         """
-        get_consensus_labels returns a pd.DataFrame
+        get_consensus_labels returns a list
         """
         _a = pd.DataFrame()
         _res0 = get_consensus_labels(_a)
         self.assertIsInstance(
             _res0,
-            pd.DataFrame,
-            f"""get_consensus_labels should return a pd.DataFrame,
+            list,
+            f"""get_consensus_labels should return a list,
             but returned {type(_res0)}""",
         )
         self.assertTrue(
-            _res0.empty,
-            f"""get_consensus_labels called on empty dictionary should return
-            an empty pd.DataFrame, but returned {_res0}""",
+            len(_res0) == 0,
+            f"""get_consensus_labels called on empty pd.DataFrame should return
+            an empty list, but returned {_res0}""",
         )
 
     def test_on_simple_sets(self):
         """
-        get_consensus_labels returns a pd.DataFrame
+        get_consensus_labels returns a list
         """
-        _a = {"A0_B1_C0": {0, 1}, "A1_B0_C1": {2}, "A1_B1_C0": {3}}
+        _a = pd.DataFrame(
+            {
+                "A": [0, 0, 0, 1, 1, 1],
+                "B": [1, 0, 0, 1, 0, 0],
+                "C": [1, 1, 0, 0, 1, 1],
+            }
+        )
         _res0 = get_consensus_labels(_a)
         self.assertIsInstance(
             _res0,
-            pd.DataFrame,
-            f"""get_consensus_labels should return a pd.DataFrame,
+            list,
+            f"""get_consensus_labels should return a list,
             but returned {type(_res0)}""",
         )
         self.assertFalse(
-            _res0.empty,
-            f"""get_consensus_labels called on non-empty dictionary should return
-            a non-empty pd.DataFrame, but returned {_res0}""",
+            len(_res0) == 0,
+            f"""get_consensus_labels called on non-empty pd.DataFrame should return
+            a non-empty list, but returned {_res0}""",
+        )
+        self.assertListEqual(
+            _res0,
+            ["A0_B1_C1", "A0_B0_C1", "A0_B0_C0", "A1_B1_C0", "A1_B0_C1", "A1_B0_C1"],
+            f"""get_consensus_labels called on non-empty pd.DataFrame should return
+            the correct non-empty list, but returned {_res0}""",
         )
 
 

diff --git a/tests/test_get_consensus_labels_df.py b/tests/test_get_consensus_labels_df.py
@@ -0,0 +1,64 @@
+import unittest
+
+import pandas as pd
+
+from multilayer_alignment.consensus import _get_consensus_labels_df
+
+
+class TestGetConsensusLabels(unittest.TestCase):
+    """
+    Test functionality of consensus.get_consensus_labels_df()
+    ------------
+    Example
+    ------------
+    >>> python3 -m unittest -v tests.test_get_consensus_labels_df
+    """
+
+    def test_on_empty(self):
+        """
+        _get_consensus_labels_df returns a pd.DataFrame
+        """
+        _a = dict()
+        _res0 = _get_consensus_labels_df(_a)
+        self.assertIsInstance(
+            _res0,
+            pd.DataFrame,
+            f"""_get_consensus_labels_df should return a pd.DataFrame,
+            but returned {type(_res0)}""",
+        )
+        self.assertTrue(
+            _res0.empty,
+            f"""_get_consensus_labels_df called on empty dictionary should return
+            an empty pd.DataFrame, but returned {_res0}""",
+        )
+
+    def test_on_simple_sets(self):
+        """
+        _get_consensus_labels_df returns a pd.DataFrame
+        """
+        _a = {"A0_B1_C0": {0, 1}, "A1_B0_C1": {2}, "A1_B1_C0": {3}}
+        _res0 = _get_consensus_labels_df(_a)
+        self.assertIsInstance(
+            _res0,
+            pd.DataFrame,
+            f"""_get_consensus_labels_df should return a pd.DataFrame,
+            but returned {type(_res0)}""",
+        )
+        self.assertFalse(
+            _res0.empty,
+            f"""_get_consensus_labels_df called on non-empty dictionary should return
+            a non-empty pd.DataFrame, but returned {_res0}""",
+        )
+        pd.testing.assert_frame_equal(
+            _res0,
+            pd.DataFrame(
+                {
+                    "id": [0, 1, 2, 3],
+                    "label": ["A0_B1_C0", "A0_B1_C0", "A1_B0_C1", "A1_B1_C0"],
+                }
+            ),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()