diff --git a/README.md b/README.md index 2fe9401d..3430d905 100644 --- a/README.md +++ b/README.md @@ -244,6 +244,75 @@ pipenv run python3 gatorgrouper_cli.py --debug If neither of these flags are set, logging will only be shown if an error occurs. +### Kernighan-Lin Grouping Method + +The Kernighan-Lin algorithm creates a k-way graph partition that determines the +grouping of students based on their preferences for working with other students +and compatibility with other classmates. The graph recognizes student compatibility +through numerical weights (indicators of student positional relationship on the graph). +This grouping method allows for a systematic approach and balanced number of student +groups capable of tackling different types of work. Students should enter student +name, number of groups, objective weights (optional), objective_measures(optional), +students preferred to work with (optional), preference weight(optional), +and preferences_weight_match(optional). Note that number of groups must be at +least 2 and be a power of 2, i.e. 2, 4, 8... + +NOTE: `--method graph` and `--num-group` are required to create groups. + +It is required to use the graph argument to generate groups through the graph +partitioning. To generate groups using the Kernighan-Lin grouping algorithm use +the flag `--method graph` + +```shell +pipenv run python gatorgrouper_cli.py --file filepath --method graph +--num-group NUMBER +``` + +To load student preferences, a preference weight, use the flag `--preferences` + +```shell +pipenv run python gatorgrouper_cli.py --file filepath --method graph +--num-group NUMBER --preferences filepath +``` + +To indicate student preference weight use the flag `--preferences_weight` + +```shell +pipenv run python gatorgrouper_cli.py --file filepath --method graph +--num-group NUMBER --preferences filepath --preferences_weight PREFERENCES_WEIGHT +``` + +To indicate preference weight match use the flag `--preferences_weight_match` + +```shell +pipenv run python gatorgrouper_cli.py --file filepath --method graph +--num-group NUMBER --preferences filepath --preferences_weight PREFERENCES_WEIGHT +--preferences_weight_match PREFERENCES_WEIGHT_MATCH +``` + +To add objective measures use the flag `--objective_measures` + +```shell +pipenv run python gatorgrouper_cli.py --file filepath --method graph +--num-group NUMBER --objective_measures LIST --objective_weights LIST +``` + +To add objective weights use the flag `--objective_weights` + +```shell +pipenv run python gatorgrouper_cli.py --file filepath --method graph +--num-group NUMBER --objective_measures LIST --objective_weights LIST +``` + +A command line of all agruments would be: + +```shell +pipenv run python gatorgrouper_cli.py --file filepath --method graph +--num-group NUMBER --preferences filepath --preferences-weight PREFERENCES_WEIGHT +--preferences-weight-match PREFERENCES_WEIGHT_MATCH --objective-measures LIST +--objective-weights LIST +``` + ### Full Example ```shell diff --git a/gatorgrouper/utils/constants.py b/gatorgrouper/utils/constants.py index dfd26362..4ae57139 100644 --- a/gatorgrouper/utils/constants.py +++ b/gatorgrouper/utils/constants.py @@ -8,6 +8,11 @@ DEFAULT_GRPSIZE = 3 DEFAULT_NUMGRP = 3 DEFAULT_ABSENT = "" +DEFAULT_PREFERENCES = None +DEFAULT_PREFERENCES_WEIGHT = 1.1 +DEFAULT_PREFERENCES_WEIGHT_MATCH = 1.3 +DEFAULT_OBJECTIVE_WEIGHTS = None +DEFAULT_OBJECTIVE_MEASURES = None # assertion NONE = "" diff --git a/gatorgrouper/utils/group_graph.py b/gatorgrouper/utils/group_graph.py index caf30426..16e7f2ec 100644 --- a/gatorgrouper/utils/group_graph.py +++ b/gatorgrouper/utils/group_graph.py @@ -10,14 +10,18 @@ def recursive_kl(graph: Graph, numgrp=2) -> List[Set[int]]: """ - Recursively use Kernighan-Lin algorithm to create a k-way graph partition + Recursively use the Kernighan-Lin algorithm to create a k-way graph partition. + This function will either return two groups or more than two depending on the + value of numgrp. Each group generated is different from the previous. """ power = log(numgrp, 2) if power != int(power) or power < 1: raise ValueError("numgrp must be a power of 2 and at least 2.") + # For a group of two bisect it and return two groups if numgrp == 2: # Base case for recursion: use Kernighan-Lin to create 2 groups return list(kernighan_lin_bisection(graph)) + # For the next group of two divide numgrp by 2 next_numgrp = numgrp / 2 groups = [] for subset in kernighan_lin_bisection(graph): @@ -31,10 +35,11 @@ def total_cut_size(graph: Graph, partition: List[int]) -> float: Computes the sum of weights of all edges between different subsets in the partition """ cut = 0.0 + # Edges are added from the nodes on the graph, creating subsets for i, subset1 in enumerate(partition): for subset2 in partition[i:]: + # Sum of weights added from all subsets and set equal to cut cut += cut_size(graph, subset1, T=subset2) - print(subset1, subset2, cut) return cut @@ -58,10 +63,13 @@ def compatibility( If no measures are specified, "avg" is used as a default. """ if not len(a) == len(b): - raise Exception("Tuples passed to compatibility() must have same size") + # Raise an exception notice if student tuples don't match + raise Exception("Tuples passed to compatibility() must have same size.") if objective_weights is None: + # Return length objective_weights = [1] * len(a) if objective_measures is None: + # Default to return average if set equal to None objective_measures = ["avg"] * len(a) scores = [] for a_score, b_score, weight, measure in zip( @@ -80,6 +88,8 @@ def compatibility( compat = int(a_score == b_score) elif measure == "diff": compat = abs(a_score - b_score) + else: + raise Exception("Invalid measure") # Scale the compatibility of a[i] and b[i] using the i-th objective weight scores.append(compat * weight) @@ -96,7 +106,8 @@ def group_graph_partition( preferences_weight_match=1.3, ): """ - Form groups using recursive Kernighan-Lin algorithm + Form groups using recursive Kernighan-Lin algorithm by reading in students list + and weight list and partitioning the vertices. """ # Read in students list and the weight list students = [item[0] for item in inputlist] @@ -133,17 +144,3 @@ def group_graph_partition( for p in partition: groups.append([inputlist[i] for i in p]) return groups - - -if __name__ == "__main__": - student_data = [ - ["one", 0, 0], - ["two", 0, 0.5], - ["three", 0.5, 0], - ["four", 0.75, 0.75], - ["five", 0.8, 0.1], - ["six", 0, 1], - ["seven", 1, 0], - ["eight", 1, 1], - ] - student_groups = group_graph_partition(student_data, 4) diff --git a/gatorgrouper/utils/parse_arguments.py b/gatorgrouper/utils/parse_arguments.py index cd52cc45..82f5b45c 100644 --- a/gatorgrouper/utils/parse_arguments.py +++ b/gatorgrouper/utils/parse_arguments.py @@ -66,6 +66,46 @@ def parse_arguments(args): required=False, ) + gg_parser.add_argument( + "--preferences", + help="Preferences of students for graph algorithm", + type=str, + default=constants.DEFAULT_PREFERENCES, + required=False, + ) + + gg_parser.add_argument( + "--preferences-weight", + help="Prefered weights", + type=float, + default=constants.DEFAULT_PREFERENCES_WEIGHT, + required=False, + ) + + gg_parser.add_argument( + "--preferences-weight-match", + help="Prefered matching weights", + type=float, + default=constants.DEFAULT_PREFERENCES_WEIGHT_MATCH, + required=False, + ) + + gg_parser.add_argument( + "--objective-weights", + help="Objective weights for compatibility input csv file", + type=list, + default=constants.DEFAULT_OBJECTIVE_WEIGHTS, + required=False, + ) + + gg_parser.add_argument( + "--objective-measures", + help="Objective measures for compatibility input csv file: sum, avg, max, min, match, diff", + type=list, + default=constants.DEFAULT_OBJECTIVE_MEASURES, + required=False, + ) + gg_arguments_finished = gg_parser.parse_args(args) logging.basicConfig( diff --git a/gatorgrouper/utils/read_student_file.py b/gatorgrouper/utils/read_student_file.py index 93a1f2a7..b77d52fe 100644 --- a/gatorgrouper/utils/read_student_file.py +++ b/gatorgrouper/utils/read_student_file.py @@ -1,6 +1,7 @@ """ Reads CSV data file """ import csv +import re from pathlib import Path @@ -28,8 +29,10 @@ def read_csv_data(filepath): temp.append(True) elif value.lower() == "false": temp.append(False) - else: + elif re.match(r"^\d+?\.\d+?$", value) or value.isdigit(): temp.append(float(value)) + else: + temp.append(value) responses.append(temp) else: for record in csvdata: @@ -40,7 +43,9 @@ def read_csv_data(filepath): temp.append(True) elif value.lower() == "false": temp.append(False) - else: + elif re.match(r"^\d+?\.\d+?$", value) or value.isdigit(): temp.append(float(value)) + else: + temp.append(value) responses.append(temp) return responses diff --git a/gatorgrouper_cli.py b/gatorgrouper_cli.py index 3ba3d83a..607ebbc4 100644 --- a/gatorgrouper_cli.py +++ b/gatorgrouper_cli.py @@ -22,6 +22,7 @@ # read in the student identifiers from the specified file input_list = read_student_file.read_csv_data(GG_ARGUMENTS.file) + preference = dict(read_student_file.read_csv_data(GG_ARGUMENTS.preferences)) check_if_arguments_valid = parse_arguments.check_valid(GG_ARGUMENTS, input_list) if check_if_arguments_valid is False: print("Incorrect command-line arguments.") @@ -53,7 +54,13 @@ ) elif GG_ARGUMENTS.method == constants.ALGORITHM_GRAPH: GROUPED_STUDENT_IDENTIFIERS = group_graph.group_graph_partition( - SHUFFLED_STUDENT_IDENTIFIERS, GG_ARGUMENTS.num_group + SHUFFLED_STUDENT_IDENTIFIERS, + GG_ARGUMENTS.num_group, + preferences=preference, + preferences_weight=GG_ARGUMENTS.preferences_weight, + preferences_weight_match=GG_ARGUMENTS.preferences_weight_match, + objective_weights=GG_ARGUMENTS.objective_weights, + objective_measures=GG_ARGUMENTS.objective_measures, ) else: GROUPED_STUDENT_IDENTIFIERS = group_creation.group_random_num_group( diff --git a/tests/conftest.py b/tests/conftest.py index bd0b9e1e..82c37b2e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -97,12 +97,19 @@ def generate_csv_no_header(tmpdir_factory): def generate_csv_float(tmpdir_factory): """ Generate a tempory sample csv """ fn = tmpdir_factory.mktemp("data").join("csvNg.csv") - headers = ["NAME", "Q1", "Q2", "Q3", "Q4"] + headers = ["NAME", "Q1", "Q2", "Q3", "Q4", "Q5"] with open(str(fn), "w") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=headers) writer.writeheader() writer.writerow( - {"NAME": "delgrecoj", "Q1": "1.2", "Q2": "1.1", "Q3": "0.9", "Q4": "2.3"} + { + "NAME": "delgrecoj", + "Q1": "1.2", + "Q2": "1.1", + "Q3": "0.9", + "Q4": "2.3", + "Q5": "Name", + } ) return str(fn) @@ -113,8 +120,8 @@ def generate_csv_float_no_header(tmpdir_factory): fn = tmpdir_factory.mktemp("data").join("csvNg1.csv") data = [ # optionally include headers as the first entry - ["delgrecoj", "1.2", "0.7", "1.1", "0.2"], - ["delgrecoj2", "0.1", "0.5", "0.8", "0.6"], + ["delgrecoj", "1.2", "0.7", "1.1", "0.2", "Name"], + ["delgrecoj2", "0.1", "0.5", "0.8", "0.6", "Name"], ] csv_string = "" for entry in data: diff --git a/tests/test_group_graph.py b/tests/test_group_graph.py new file mode 100644 index 00000000..b8d9549e --- /dev/null +++ b/tests/test_group_graph.py @@ -0,0 +1,197 @@ +""" Test group graph algorithm""" +import itertools +from networkx import Graph +import pytest +from gatorgrouper.utils import group_graph + + +def test_recursive_kl_error(): + """ Test if ValueError is called if numgrp is not a power of 2 and not at least 2 """ + G = Graph() + with pytest.raises(ValueError) as excinfo: + group_graph.recursive_kl(G, numgrp=1) + exception_msg = excinfo.value.args[0] + assert exception_msg == "numgrp must be a power of 2 and at least 2." + + +def test_recursive_kl_two(): + """ Test if recursive Kernighan-Lin algorithm returns two groups recursively """ + G = Graph() + student1 = (1, 4) + student2 = (2, 3) + G.add_edge(student1, student2) + actual_output = group_graph.recursive_kl(G, 2) + + assert actual_output in ([{student2}, {student1}], [{student1}, {student2}]) + + +def test_recursive_kl_multi(): + """ Test if recursive Kernighan-Lin algorithm returns more than two groups """ + G = Graph() + student1 = (1, 4) + student2 = (2, 3) + student3 = (5, 7) + student4 = (6, 8) + students = [student1, student2, student3, student4] + group_list = list(itertools.combinations(students, 2)) + G.add_edges_from(group_list) + actual_output = group_graph.recursive_kl(G, 2) + assert len(actual_output) == 2 + assert len(actual_output[0]) == 2 + assert len(actual_output[1]) == 2 + + +def test_total_cut_size(): + """ Test if cut size of subsets in partition match """ + G = Graph() + G.add_edges_from([(1, 4), (2, 3)]) + partition = [(1, 2), (3, 4)] + output = group_graph.total_cut_size(G, partition) + expected_output = 2 + assert output == expected_output + + +def test_compatibility_length(): + """ Test if exception message is raised by unequal students' scores """ + a = tuple([1.0]) + b = tuple([2.0, 0.5]) + with pytest.raises(Exception) as excinfo: + group_graph.compatibility(a, b) + exception_msg = excinfo.value.args[0] + assert exception_msg == "Tuples passed to compatibility() must have same size." + + +def test_compatibility_measure_callable(): + """ Gives a callable measure to compatibility and tests if it is used """ + + def score(a, b): + return a + b + + a = [1, 1] + b = [0, 0.5] + output = group_graph.compatibility(a, b, objective_measures=[score, score]) + assert output == sum([1, 1.5]) + + +def test_compatibility_measure_preset(): + """ Test all preset measures """ + a = [1, 1] + b = [0, 0.5] + + output = group_graph.compatibility(a, b, objective_measures=["avg", "avg"]) + assert output == sum([0.5, 0.75]) + output = group_graph.compatibility(a, b, objective_measures=["max", "max"]) + assert output == sum([1, 1]) + output = group_graph.compatibility(a, b, objective_measures=["min", "min"]) + assert output == sum([0, 0.5]) + output = group_graph.compatibility(a, b, objective_measures=["match", "match"]) + assert output == sum([0, 0]) + output = group_graph.compatibility(a, b, objective_measures=["diff", "diff"]) + assert output == sum([1, 0.5]) + + +def test_compatibility_objective_weights(): + """ Test if objective_weights returns the objective weights of students """ + a = [1, 1] + b = [0, 0.5] + objective_weights = (2.0, 1.0) + output = group_graph.compatibility(a, b, objective_weights) + expected_output = 1.75 + assert output == expected_output + + +def test_compatibility_objective_measures(): + """ Test if objective_measures returns the objective measure of students """ + a = [0, 1] + b = [0.75, 0.75] + objective_measures = ("avg", "match") + output = group_graph.compatibility(a, b, objective_measures=objective_measures) + expected_output = 0.375 + assert output == expected_output + + +def test_compatibility_measure_average(): + """ Test if measure of different student scores return an average """ + a = [1, 1] + b = [0, 1] + output = group_graph.compatibility(a, b) + expected_output = 1.5 + assert output == expected_output + + +def test_compatibility_measure_max(): + """ Test if measure of different student scores return a maximum """ + a = [1, 0] + b = [0, 0.5] + objective_measures = ("max", "max") + output = group_graph.compatibility(a, b, objective_measures=objective_measures) + expected_output = 1.5 + assert output == expected_output + + +def test_compatibility_measure_min(): + """ Test if measure of different student scores return a minimum """ + a = [1, 0] + b = [0, 0.5] + objective_measures = ("min", "min") + output = group_graph.compatibility(a, b, objective_measures=objective_measures) + expected_output = 0 + assert output == expected_output + + +def test_compatibility_measure_match(): + """ Test if measure of different student scores are both equal """ + a = [1, 0] + b = [1, 0.5] + objective_measures = ("match", "match") + output = group_graph.compatibility(a, b, objective_measures=objective_measures) + assert output == sum([1, 0]) + + +def test_compatibility_measure_diff(): + """ Test if measure of different student scores returns an absolute value difference """ + a = [1, 0] + b = [0, 0.5] + objective_measures = ("diff", "diff") + output = group_graph.compatibility(a, b, objective_measures=objective_measures) + expected_output = 1.5 + assert output == expected_output + + +def test_compatibility_measure_error(): + """ Test if wrong measure raises Exception error """ + a = tuple([1.0, 0.8]) + b = tuple([2.0, 0.5]) + with pytest.raises(Exception) as excinfo: + group_graph.compatibility(a, b, objective_measures=["su", "mu"]) + exception_msg = excinfo.value.args[0] + assert exception_msg == "Invalid measure" + + +def test_group_graph_partition(): + """ + Test for using recursive Kernighan-Lin algorithm that checks the output of + the group_graph_partition function with preferences as input + """ + students = [ + ["one", 0, 0], + ["two", 0, 0.5], + ["three", 0.5, 0], + ["four", 0.75, 0.75], + ["five", 0.8, 0.1], + ["six", 0, 1], + ["seven", 1, 0], + ["eight", 1, 1], + ] + preference = { + "one": {"seven", "five"}, + "two": {"three", "six"}, + "three": {"two", "four"}, + "four": {"four", "three"}, + "five": {"six", "one"}, + "six": {"five"}, + "seven": {"six"}, + "eight": {"seven"}, + } + output = group_graph.group_graph_partition(students, 4, preferences=preference) + assert len(output[0]) == 2 diff --git a/tests/test_read_student_file.py b/tests/test_read_student_file.py index 633647a8..33afbfe8 100644 --- a/tests/test_read_student_file.py +++ b/tests/test_read_student_file.py @@ -32,15 +32,15 @@ def test_read_student_file_no_header(generate_csv_no_header): def test_read_student_file_float(generate_csv_float): """ Test read_student_file """ - expectedoutput = [["delgrecoj", 1.2, 1.1, 0.9, 2.3]] + expectedoutput = [["delgrecoj", 1.2, 1.1, 0.9, 2.3, "Name"]] assert read_student_file.read_csv_data(generate_csv_float) == expectedoutput def test_read_student_file_no_header_float(generate_csv_float_no_header): """ Test read_student_file """ expectedoutput = [ - ["delgrecoj", 1.2, 0.7, 1.1, 0.2], - ["delgrecoj2", 0.1, 0.5, 0.8, 0.6], + ["delgrecoj", 1.2, 0.7, 1.1, 0.2, "Name"], + ["delgrecoj2", 0.1, 0.5, 0.8, 0.6, "Name"], ] assert ( read_student_file.read_csv_data(generate_csv_float_no_header) == expectedoutput