-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfp-tree.py
142 lines (109 loc) · 5.17 KB
/
fp-tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# Tested on python=3.9
# -> dicts should hold insertion order (python>=3.6).
# "UniqueDotExporter" requires Graphviz program to be installed.
from anytree import NodeMixin, RenderTree
from anytree.exporter import UniqueDotExporter
class FpNode(NodeMixin):
"""Nodes of the fp-tree."""
def __init__(self, name, path_to_root:str=None, parent=None, link=None, count=1):
super().__init__()
self.count = count
self.name = name
self.parent = parent
self.path_to_root = path_to_root
self.link = link
class FpTree:
"""Generate Fp-tree for 10 items."""
def __init__(self):
self.tree_nodes = {}
self.last_nodes = {"0": FpNode("Head-0", "Head-0"), "1": FpNode("Head-1", "Head-1"),
"2": FpNode("Head-2", "Head-2"), "3": FpNode("Head-3", "Head-3"),
"4": FpNode("Head-4", "Head-4"), "5": FpNode("Head-5", "Head-5"),
"6": FpNode("Head-6", "Head-6"), "7": FpNode("Head-7", "Head-7"),
"8": FpNode("Head-8", "Head-8"), "9": FpNode("Head-9", "Head-9")}
def build(self, parent_node, further_items: dict, path_to_root: str = "R"):
"""Build the Fp-tree with nodes and save its structure in tree_nodes dict."""
cur_item = list(further_items)[0]
path_to_root += cur_item
if path_to_root not in self.tree_nodes:
cur_node = FpNode(cur_item, path_to_root, parent_node,
None, further_items[cur_item])
self.last_nodes[cur_item].link = cur_node
self.last_nodes[cur_item] = cur_node
self.tree_nodes[path_to_root] = cur_node
if len(further_items) != 1:
self.build(cur_node, dict(list(further_items.items())[1:]), path_to_root)
else:
cur_node = self.tree_nodes[path_to_root]
cur_node.count += 1
if len(further_items) != 1:
self.build(cur_node, dict(list(further_items.items())[1:]), path_to_root)
def print_nodes_on_terminal(self):
"""Print each node of the fp-tree on terminal."""
for node_path, node in self.tree_nodes.items():
if node.link:
link_p = node.link.path_to_root
else:
link_p = "None"
print(f"Path: {node_path} | Node: {node.name} | Parent: {node.parent.name} | "
f"link: {link_p} | Count: {node.count}")
def save_ascii_tree(self, root_node):
"""Save fp-tree structure graph in ASCII format."""
with open("fp-tree-ascii-graph.txt","w", encoding='utf8') as outfile:
for pre, _, node in RenderTree(root_node):
if node.name == "root":
treestr = f"{pre}{node.name}: {node.count}"
else:
if node.link:
link_p = node.link.path_to_root
else:
link_p = "None"
treestr = f"{pre}{node.name}: {node.count} (Link={link_p})"
outfile.write(treestr.ljust(8)+'\n')
print("-----------------------------------------\n"
"Tree saved in fp-tree-ascii-graph.txt")
def save_visual_tree(self, root_node):
"""Save fp-tree structure graph in visual format."""
UniqueDotExporter(
root_node,
nodeattrfunc=lambda node: "shape=box",
graph="graph",
edgetypefunc=lambda node,child: "--",
nodenamefunc=lambda n: f"'{n.name}': {n.count}\n({n.path_to_root})"
).to_picture("fp-tree-visual-graph.png")
print("-----------------------------------------\n"
"Tree saved in fp-tree-visual-graph.png")
def count_item_freq(input_file_name: str, min_sup: int) -> dict:
"""First scan: Create sorted items frequency with min sup"""
items = {"0": 0, "1": 0, "2": 0, "3": 0, "4": 0, "5": 0, "6": 0, "7": 0, "8": 0, "9": 0}
with open(input_file_name,"r") as dataset:
for record in dataset:
for item in record.strip().split():
if int(item) in range(0, 10):
items[item] += 1
sorted_items = sorted(items.items(), key= lambda x: x[1], reverse=True)
return {k: v for k, v in sorted_items if v >= min_sup}
def main():
"""Initiate the tree and run the program"""
MIN_SUP = 1
dataset_file_name = "records.txt"
item_freq_list = count_item_freq(dataset_file_name, MIN_SUP) # First Scan
tree = FpTree()
root_node = FpNode("root")
freq = "".join(list(item_freq_list.keys()))
with open(dataset_file_name,"r") as dataset: # Second Scan
for record in dataset:
node_path = {}
record = record.strip().split()
for item in freq:
if item in record:
node_path[item] = record.count(item)
if len(node_path) != 0:
tree.build(root_node, node_path)
tree.print_nodes_on_terminal()
tree.save_ascii_tree(root_node)
tree.save_visual_tree(root_node)
print("-----------------------------------------")
print(f"Items frequent list (ordered) (MIN_SUP={MIN_SUP}):\n{item_freq_list}")
if __name__ == "__main__":
main()