Skip to content

Commit

Permalink
Refactor/clean score (#4)
Browse files Browse the repository at this point in the history
* rename alignment_score to score

* make function private, add wrapper

* cleanup

* type ignore

* update tests

* add test correct result

* update docs
  • Loading branch information
letiziaia authored Mar 10, 2024
1 parent 859e465 commit 65a80a1
Show file tree
Hide file tree
Showing 10 changed files with 177 additions and 43 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,26 @@ This repository implements an algorithm for quantifying multilayer alignment or
- `\multilayer_alignment\`: source code
- `\tests\`: tests for the source code

## Installing the package

### From PIP

This package can be installed directly from the Python Package Index (PyPI) using `pip` from the command-line interface by executing the following command:

```shell
$ pip install multilayer-alignment
```

### Build from source

Alternatively, the package can be installed by first cloning the repository containing the source code and then installing the package locally in a chosen directory:

```shell
$ git clone git@github.com:letiziaia/multilayer-alignment.git
$ cd multilayer-alignment
$ pip install .
```

## Setting up the development environment

![python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)
Expand Down
59 changes: 45 additions & 14 deletions RUNBOOK.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ From root directory,
$ python -m unittest discover -v
```

## User Guide

### Given opinion partitions for each of the topics, compute the consensus partition

```python
# import needed modules
# import needed libraries
>>> import pandas as pd
>>> from multilayer_alignment.consensus import get_consensus_partition
>>> import multilayer_alignment.consensus as mac

# load the opinion labels to a pandas DataFrame
>>> df = pd.DataFrame(
Expand All @@ -44,21 +46,35 @@ $ python -m unittest discover -v
)

# get consensus partition
>>> get_consensus_partition(opinions=df)
>>> mac.get_consensus_partition(opinions=df)
{
"A0_B0_C1": {0},
"A0_B1_C0": {1},
"A1_B0_C1": {2},
"A1_B1_C0": {3}
}

# this function is equivalent, but might be slower
>>> mac.get_consensus_partition_recursive(opinions=df)
{
"A0_B0_C1": {0},
"A0_B1_C0": {1},
"A1_B0_C1": {2},
"A1_B1_C0": {3}
}

# get list of labels for the consensus partition
>>> mac.get_consensus_labels(opinions=df)
['A0_B0_C1', 'A0_B1_C0', 'A1_B0_C1', 'A1_B1_C0']
```

Alternatively:
### Given opinion partitions for each of the topics, compute the multiway alignment score of all of them

```python
# import needed modules
# import needed libraries
>>> import pandas as pd
>>> from multilayer_alignment.consensus import get_consensus_partition_recursive
>>> import multilayer_alignment.consensus as mac
>>> import multilayer_alignment.score as mas

# load the partitions labels to a pandas DataFrame
>>> df = pd.DataFrame(
Expand All @@ -71,12 +87,27 @@ Alternatively:
}
)

# get consensus partition
>>> get_consensus_partition_recursive(opinions=df)
{
"A0_B0_C1": {0},
"A0_B1_C0": {1},
"A1_B0_C1": {2},
"A1_B1_C0": {3}
}
# get list of labels for the consensus partition
>>> partition_labels = mac.get_consensus_labels(opinions=df)

# compute 3-way alignment score using AMI (adjusted mutual info score)
# and adjust with the null model
>>> mas.multilayer_alignment_score(
... df, partition_labels, which_score="ami", adjusted=True,
... )
6.40685300762983e-16

# compute 3-way alignment score using NMI (normalized mutual info score)
# and adjust with the null model
>>> mas.multilayer_alignment_score(
... df, partition_labels, which_score="nmi", adjusted=True,
... )
0.0

# if we use NMI (normalized mutual info score) without adjusting it
# with a null model, the resulting score is inflated
>>> mas.multilayer_alignment_score(
... df, partition_labels, which_score="nmi", adjusted=False,
... )
0.6666666666666666
```
16 changes: 14 additions & 2 deletions multilayer_alignment/consensus.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
import pandas as pd
from typing import Any, Dict, Set, Union
from typing import Any, Dict, List, Set, Union


def get_consensus_labels(consensus_partition: Dict[str, Set[Any]]) -> pd.DataFrame:
def get_consensus_labels(opinions: Union[pd.DataFrame, pd.Series]) -> List[str]:
"""
:param opinions: pd.DataFrame having one column per topic and one row per individual,
where each element a_ij represents the opinion for individual i on topic j
and columns names are the topic names
:return: List[str], a list of consensus group labels (str)
"""
consensus_dict = get_consensus_partition(opinions=opinions)
consensus_df = _get_consensus_labels_df(consensus_partition=consensus_dict)
return consensus_df.set_index("id").iloc[opinions.index]["label"].to_list()


def _get_consensus_labels_df(consensus_partition: Dict[str, Set[Any]]) -> pd.DataFrame:
"""
:param consensus_partition: a dictionary of consensus group label (str) -> consesus group members (set)
:return: pd.DataFrame with column 'id' for the element id and column 'label' for the element label
Expand Down
2 changes: 1 addition & 1 deletion multilayer_alignment/null_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from multiprocessing.pool import Pool
from tqdm import tqdm

from multilayer_alignment.alignment_score import maximal_alignment_curve
from multilayer_alignment.score import maximal_alignment_curve # type: ignore

from multilayer_alignment.utils.logging import logger

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from multiprocessing.pool import Pool
from tqdm import tqdm

from multilayer_alignment.consensus import get_consensus_partition
from multilayer_alignment.consensus import get_consensus_labels

from multilayer_alignment.utils.logging import logger
Expand Down Expand Up @@ -121,13 +120,9 @@ def maximal_alignment_curve(
# keep only items that have labels for all items in l_comb and reindex
l_comb_df.dropna(inplace=True)
l_comb_df.reset_index(drop=True, inplace=True)
mutual_clusters = get_consensus_partition(l_comb_df)
mutual_clusters_labels = get_consensus_labels(mutual_clusters)
labels_list = (
mutual_clusters_labels.set_index("id")
.iloc[l_comb_df.index]["label"]
.to_list()
)

# consensus partition labels
labels_list = get_consensus_labels(opinions=l_comb_df)

# CRITERIA
nmi = multilayer_alignment_score(
Expand Down
2 changes: 1 addition & 1 deletion multilayer_alignment/visualizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import seaborn as sns # type: ignore
import typing

from multilayer_alignment.alignment_score import maximal_alignment_curve
from multilayer_alignment.score import maximal_alignment_curve # type: ignore
from multilayer_alignment.null_models import expected_curve_equal_sized_clusters
from multilayer_alignment.utils.logging import logger

Expand Down
4 changes: 2 additions & 2 deletions tests/test_compute_maximal_alignment_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

import pandas as pd

from multilayer_alignment.alignment_score import maximal_alignment_curve
from multilayer_alignment.score import maximal_alignment_curve


class TestComputeMaximalAlignmentCurve(unittest.TestCase):
"""
Test functionality of mutual_clusters.compute_maximal_alignment_curve()
Test functionality of score.compute_maximal_alignment_curve()
------------
Example
------------
Expand Down
2 changes: 1 addition & 1 deletion tests/test_compute_multilayer_alignment_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pandas as pd

from multilayer_alignment.alignment_score import multilayer_alignment_score
from multilayer_alignment.score import multilayer_alignment_score


class TestComputeMultilayerAlignmentScore(unittest.TestCase):
Expand Down
40 changes: 26 additions & 14 deletions tests/test_get_consensus_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class TestGetConsensusLabels(unittest.TestCase):
"""
Test functionality of mutual_clusters.get_consensus_labels()
Test functionality of consensus.get_consensus_labels()
------------
Example
------------
Expand All @@ -16,38 +16,50 @@ class TestGetConsensusLabels(unittest.TestCase):

def test_on_empty(self):
"""
get_consensus_labels returns a pd.DataFrame
get_consensus_labels returns a list
"""
_a = pd.DataFrame()
_res0 = get_consensus_labels(_a)
self.assertIsInstance(
_res0,
pd.DataFrame,
f"""get_consensus_labels should return a pd.DataFrame,
list,
f"""get_consensus_labels should return a list,
but returned {type(_res0)}""",
)
self.assertTrue(
_res0.empty,
f"""get_consensus_labels called on empty dictionary should return
an empty pd.DataFrame, but returned {_res0}""",
len(_res0) == 0,
f"""get_consensus_labels called on empty pd.DataFrame should return
an empty list, but returned {_res0}""",
)

def test_on_simple_sets(self):
"""
get_consensus_labels returns a pd.DataFrame
get_consensus_labels returns a list
"""
_a = {"A0_B1_C0": {0, 1}, "A1_B0_C1": {2}, "A1_B1_C0": {3}}
_a = pd.DataFrame(
{
"A": [0, 0, 0, 1, 1, 1],
"B": [1, 0, 0, 1, 0, 0],
"C": [1, 1, 0, 0, 1, 1],
}
)
_res0 = get_consensus_labels(_a)
self.assertIsInstance(
_res0,
pd.DataFrame,
f"""get_consensus_labels should return a pd.DataFrame,
list,
f"""get_consensus_labels should return a list,
but returned {type(_res0)}""",
)
self.assertFalse(
_res0.empty,
f"""get_consensus_labels called on non-empty dictionary should return
a non-empty pd.DataFrame, but returned {_res0}""",
len(_res0) == 0,
f"""get_consensus_labels called on non-empty pd.DataFrame should return
a non-empty list, but returned {_res0}""",
)
self.assertListEqual(
_res0,
["A0_B1_C1", "A0_B0_C1", "A0_B0_C0", "A1_B1_C0", "A1_B0_C1", "A1_B0_C1"],
f"""get_consensus_labels called on non-empty pd.DataFrame should return
the correct non-empty list, but returned {_res0}""",
)


Expand Down
64 changes: 64 additions & 0 deletions tests/test_get_consensus_labels_df.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import unittest

import pandas as pd

from multilayer_alignment.consensus import _get_consensus_labels_df


class TestGetConsensusLabels(unittest.TestCase):
"""
Test functionality of consensus.get_consensus_labels_df()
------------
Example
------------
>>> python3 -m unittest -v tests.test_get_consensus_labels_df
"""

def test_on_empty(self):
"""
_get_consensus_labels_df returns a pd.DataFrame
"""
_a = dict()
_res0 = _get_consensus_labels_df(_a)
self.assertIsInstance(
_res0,
pd.DataFrame,
f"""_get_consensus_labels_df should return a pd.DataFrame,
but returned {type(_res0)}""",
)
self.assertTrue(
_res0.empty,
f"""_get_consensus_labels_df called on empty dictionary should return
an empty pd.DataFrame, but returned {_res0}""",
)

def test_on_simple_sets(self):
"""
_get_consensus_labels_df returns a pd.DataFrame
"""
_a = {"A0_B1_C0": {0, 1}, "A1_B0_C1": {2}, "A1_B1_C0": {3}}
_res0 = _get_consensus_labels_df(_a)
self.assertIsInstance(
_res0,
pd.DataFrame,
f"""_get_consensus_labels_df should return a pd.DataFrame,
but returned {type(_res0)}""",
)
self.assertFalse(
_res0.empty,
f"""_get_consensus_labels_df called on non-empty dictionary should return
a non-empty pd.DataFrame, but returned {_res0}""",
)
pd.testing.assert_frame_equal(
_res0,
pd.DataFrame(
{
"id": [0, 1, 2, 3],
"label": ["A0_B1_C0", "A0_B1_C0", "A1_B0_C1", "A1_B1_C0"],
}
),
)


if __name__ == "__main__":
unittest.main()

0 comments on commit 65a80a1

Please sign in to comment.