From 6e848f48f33cf2ed5b6054431d645694cabf50c6 Mon Sep 17 00:00:00 2001 From: lakhoune Date: Thu, 8 Feb 2024 00:11:45 +0100 Subject: [PATCH 1/8] Add support for handling cyclic graphs in BotParser --- utils/bot/parse_lib.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/utils/bot/parse_lib.py b/utils/bot/parse_lib.py index d4f8fc2..67f0f92 100644 --- a/utils/bot/parse_lib.py +++ b/utils/bot/parse_lib.py @@ -164,9 +164,51 @@ def get_dfg(self): end_activities.add("empty_intent") else: end_activities.add(node_id) + for node_id in start_activities: + # if the start_activity has an ingoing edge, we have a cyclic graph. + # In this case, each we should add a dummy end activity and + # connect each node that has an outgoing edge to a start activity to this dummy end activity instead + ingoing_edges = self.get_incoming_edges(node_id) + if (len(ingoing_edges) > 0): + if("empty_intent" not in end_activities): + end_activities.add("empty_intent") + for edge in ingoing_edges: + source_id = edge['source'] + dfg[(source_id, "empty_intent")] = 0 + if (source_id, node_id) in dfg: + dfg.pop((source_id, node_id)) return dfg, start_activities, end_activities + def get_outgoing_edges(self, node_id): + """ + Gets the outgoing edges of a node + :param node_id: the id of the node + :return: the outgoing edges of the node + :example: + >>> edges = get_outgoing_edges("n1") + """ + outgoing_edges = [] + for edge in self.edges.values(): + if edge['source'] == node_id: + outgoing_edges.append(edge) + return outgoing_edges + + def get_incoming_edges(self, node_id): + """ + Gets the incoming edges of a node + :param node_id: the id of the node + :return: the incoming edges of the node + + :example: + >>> edges = get_incoming_edges("n1") + """ + incoming_edges = [] + for edge in self.edges.values(): + if edge['target'] == node_id: + incoming_edges.append(edge) + return incoming_edges + def get_node_id_by_name(self, name): """ Gets the id of a node by its name From 392b8903d0f21dd159f4b66bcd61df5c29d4de8d Mon Sep 17 00:00:00 2001 From: lakhoune Date: Thu, 8 Feb 2024 08:43:37 +0100 Subject: [PATCH 2/8] add option to not repair --- bot_blueprint.py | 2 +- enhancement/main.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/bot_blueprint.py b/bot_blueprint.py index bdeb25c..3cbad98 100644 --- a/bot_blueprint.py +++ b/bot_blueprint.py @@ -67,7 +67,7 @@ def enhanced_bot_model(botName): try: bot_parser = get_parser(bot_model_json) bot_model_dfg, start_activities, end_activities, performance, frequency = enhance_bot_model( - event_log, bot_parser) + event_log, bot_parser,repair=True) if res_format == 'svg': gviz = dfg_visualizer.apply(bot_model_dfg) return gviz.pipe(format='svg').decode('utf-8') diff --git a/enhancement/main.py b/enhancement/main.py index 27c7371..b71faf6 100644 --- a/enhancement/main.py +++ b/enhancement/main.py @@ -12,7 +12,7 @@ bot_model_json_path = "./assets/models/test_bot_model.json" -def enhance_bot_model(event_log, bot_parser): +def enhance_bot_model(event_log, bot_parser,repair=False): """ Enhance the bot model using the event log. We assume that the bot model is incomplete @@ -26,7 +26,8 @@ def enhance_bot_model(event_log, bot_parser): """ dfg, start_activities, end_activities = bot_parser.get_dfg() # initial dfg net,im,fm = bot_parser.to_petri_net() - net,_,_ = repair_petri_net(event_log,net,im,fm) # repair the dfg + if repair == True: + net,_,_ = repair_petri_net(event_log,net,im,fm) # repair the dfg dfg = add_edge_frequency(event_log, dfg, start_activities, end_activities,bot_parser) # add the edge frequency performance = pm4py.discovery.discover_performance_dfg(event_log) From 5d8c84f5d354763e9df8eea90045f91870b1456b Mon Sep 17 00:00:00 2001 From: lakhoune Date: Thu, 8 Feb 2024 10:07:58 +0100 Subject: [PATCH 3/8] add test file for alignment --- enhancement/assets/alignment-test.json | 532 +++++++++++++++++++++++++ enhancement/assets/test.xes | 21 + enhancement/test.py | 46 +++ 3 files changed, 599 insertions(+) create mode 100644 enhancement/assets/alignment-test.json create mode 100644 enhancement/assets/test.xes create mode 100644 enhancement/test.py diff --git a/enhancement/assets/alignment-test.json b/enhancement/assets/alignment-test.json new file mode 100644 index 0000000..66781b2 --- /dev/null +++ b/enhancement/assets/alignment-test.json @@ -0,0 +1,532 @@ +{ + "attributes": { + "label": { + "id": "modelAttributes[label]", + "name": "Label", + "value": { + "id": "modelAttributes[label]", + "name": "Label", + "value": "Model Attributes" + } + }, + "left": 0, + "top": 0, + "width": 50, + "height": 50, + "zIndex": 0, + "type": "ModelAttributesNode", + "attributes": {} + }, + "nodes": { + "c89ba2cf13956cf4ea761dad": { + "label": { + "id": "c89ba2cf13956cf4ea761dad[name]", + "name": "Name", + "value": { + "id": "c89ba2cf13956cf4ea761dad[name]", + "name": "Name", + "value": "Test" + } + }, + "left": 4515, + "top": 4300, + "width": 50, + "height": 50, + "zIndex": 16001, + "type": "Bot", + "attributes": { + "80f33463988c54141b8c4dd8": { + "id": "c89ba2cf13956cf4ea761dad[name]", + "name": "Name", + "value": { + "id": "c89ba2cf13956cf4ea761dad[name]", + "name": "Name", + "value": "Test" + } + } + } + }, + "872d4c08a5b2749ea4ee2b46": { + "label": { + "id": "872d4c08a5b2749ea4ee2b46[name]", + "name": "Name", + "value": { + "id": "872d4c08a5b2749ea4ee2b46[name]", + "name": "Name", + "value": "Test" + } + }, + "left": 4515, + "top": 4400, + "width": 50, + "height": 50, + "zIndex": 16002, + "type": "Messenger", + "attributes": { + "646c1466c4bf34e8267c4f26": { + "id": "872d4c08a5b2749ea4ee2b46[messenger type]", + "name": "Messenger Type", + "value": { + "id": "872d4c08a5b2749ea4ee2b46[messenger type]", + "name": "Messenger Type", + "value": "Rocket.Chat" + }, + "option": "Rocket.Chat" + }, + "fac7a2b850919e041f655919": { + "id": "872d4c08a5b2749ea4ee2b46[name]", + "name": "Name", + "value": { + "id": "872d4c08a5b2749ea4ee2b46[name]", + "name": "Name", + "value": "Test" + } + }, + "4d07dc2fa7f97c4ef5834078": { + "id": "872d4c08a5b2749ea4ee2b46[authentication token]", + "name": "Authentication Token", + "value": { + "id": "872d4c08a5b2749ea4ee2b46[authentication token]", + "name": "Authentication Token", + "value": "" + } + } + } + }, + "a": { + "label": { + "id": "fa62355c5be39b961f3698fb[label]", + "name": "Label", + "value": { + "id": "fa62355c5be39b961f3698fb[label]", + "name": "Label", + "value": "" + } + }, + "left": 4335, + "top": 4509, + "width": 50, + "height": 50, + "zIndex": 16003, + "type": "Incoming Message", + "attributes": { + "646c1466c4bf34e8267c4f27": { + "id": "fa62355c5be39b961f3698fb[type]", + "name": "Type", + "value": { + "id": "fa62355c5be39b961f3698fb[type]", + "name": "Type", + "value": "Text Message" + }, + "option": "Text Message" + }, + "5aefe0547e628a6019d3f613": { + "id": "fa62355c5be39b961f3698fb[followup message type]", + "name": "Followup Message Type", + "value": { + "id": "fa62355c5be39b961f3698fb[followup message type]", + "name": "Followup Message Type", + "value": "text" + }, + "option": "text" + }, + "869849f65db9be737e99bd24": { + "id": "fa62355c5be39b961f3698fb[intent keyword]", + "name": "Intent Keyword", + "value": { + "id": "fa62355c5be39b961f3698fb[intent keyword]", + "name": "Intent Keyword", + "value": "start" + } + }, + "e7d374a1e2d32c5e67bc68cc": { + "id": "fa62355c5be39b961f3698fb[intent label]", + "name": "Intent Label", + "value": { + "id": "fa62355c5be39b961f3698fb[intent label]", + "name": "Intent Label", + "value": "startConvo" + } + }, + "3e8bfec67da28c354a969b46": { + "id": "fa62355c5be39b961f3698fb[nlu id]", + "name": "NLU ID", + "value": { + "id": "fa62355c5be39b961f3698fb[nlu id]", + "name": "NLU ID", + "value": "" + } + }, + "8f8bee572ff26bedbfb46783": { + "id": "fa62355c5be39b961f3698fb[isfile]", + "name": "IsFile", + "value": { + "id": "fa62355c5be39b961f3698fb[isfile]", + "name": "IsFile", + "value": false + } + }, + "1ae4674949cfead101f6e8d8": { + "id": "fa62355c5be39b961f3698fb[fileurl]", + "name": "FileURL", + "value": { + "id": "fa62355c5be39b961f3698fb[fileurl]", + "name": "FileURL", + "value": "" + } + }, + "1b9710d3ac9dbdfd3b46110a": { + "id": "fa62355c5be39b961f3698fb[errormessage]", + "name": "ErrorMessage", + "value": { + "id": "fa62355c5be39b961f3698fb[errormessage]", + "name": "ErrorMessage", + "value": "" + } + }, + "90cd8b63ad44500486ddbb02": { + "id": "fa62355c5be39b961f3698fb[message]", + "name": "Message", + "value": { + "id": "fa62355c5be39b961f3698fb[message]", + "name": "Message", + "value": "{}" + } + } + } + }, + "a2": { + "label": { + "id": "4ca200efe3a84fb802e93332[label]", + "name": "Label", + "value": { + "id": "4ca200efe3a84fb802e93332[label]", + "name": "Label", + "value": "" + } + }, + "left": 4515, + "top": 4600, + "width": 50, + "height": 50, + "zIndex": 16004, + "type": "Incoming Message", + "attributes": { + "646c1466c4bf34e8267c4f27": { + "id": "4ca200efe3a84fb802e93332[type]", + "name": "Type", + "value": { + "id": "4ca200efe3a84fb802e93332[type]", + "name": "Type", + "value": "Text Message" + }, + "option": "Text Message" + }, + "5aefe0547e628a6019d3f613": { + "id": "4ca200efe3a84fb802e93332[followup message type]", + "name": "Followup Message Type", + "value": { + "id": "4ca200efe3a84fb802e93332[followup message type]", + "name": "Followup Message Type", + "value": "text" + }, + "option": "text" + }, + "869849f65db9be737e99bd24": { + "id": "4ca200efe3a84fb802e93332[intent keyword]", + "name": "Intent Keyword", + "value": { + "id": "4ca200efe3a84fb802e93332[intent keyword]", + "name": "Intent Keyword", + "value": "startConvo2" + } + }, + "e7d374a1e2d32c5e67bc68cc": { + "id": "4ca200efe3a84fb802e93332[intent label]", + "name": "Intent Label", + "value": { + "id": "4ca200efe3a84fb802e93332[intent label]", + "name": "Intent Label", + "value": "start2" + } + }, + "3e8bfec67da28c354a969b46": { + "id": "4ca200efe3a84fb802e93332[nlu id]", + "name": "NLU ID", + "value": { + "id": "4ca200efe3a84fb802e93332[nlu id]", + "name": "NLU ID", + "value": "" + } + }, + "8f8bee572ff26bedbfb46783": { + "id": "4ca200efe3a84fb802e93332[isfile]", + "name": "IsFile", + "value": { + "id": "4ca200efe3a84fb802e93332[isfile]", + "name": "IsFile", + "value": false + } + }, + "1ae4674949cfead101f6e8d8": { + "id": "4ca200efe3a84fb802e93332[fileurl]", + "name": "FileURL", + "value": { + "id": "4ca200efe3a84fb802e93332[fileurl]", + "name": "FileURL", + "value": "" + } + }, + "1b9710d3ac9dbdfd3b46110a": { + "id": "4ca200efe3a84fb802e93332[errormessage]", + "name": "ErrorMessage", + "value": { + "id": "4ca200efe3a84fb802e93332[errormessage]", + "name": "ErrorMessage", + "value": "" + } + }, + "90cd8b63ad44500486ddbb02": { + "id": "4ca200efe3a84fb802e93332[message]", + "name": "Message", + "value": { + "id": "4ca200efe3a84fb802e93332[message]", + "name": "Message", + "value": "{}" + } + } + } + }, + "e": { + "label": { + "id": "44dd9f282d2d1f4263edcbc4[label]", + "name": "Label", + "value": { + "id": "44dd9f282d2d1f4263edcbc4[label]", + "name": "Label", + "value": "" + } + }, + "left": 4515, + "top": 4700, + "width": 50, + "height": 50, + "zIndex": 16005, + "type": "Incoming Message", + "attributes": { + "646c1466c4bf34e8267c4f27": { + "id": "44dd9f282d2d1f4263edcbc4[type]", + "name": "Type", + "value": { + "id": "44dd9f282d2d1f4263edcbc4[type]", + "name": "Type", + "value": "Text Message" + }, + "option": "Text Message" + }, + "5aefe0547e628a6019d3f613": { + "id": "44dd9f282d2d1f4263edcbc4[followup message type]", + "name": "Followup Message Type", + "value": { + "id": "44dd9f282d2d1f4263edcbc4[followup message type]", + "name": "Followup Message Type", + "value": "text" + }, + "option": "text" + }, + "869849f65db9be737e99bd24": { + "id": "44dd9f282d2d1f4263edcbc4[intent keyword]", + "name": "Intent Keyword", + "value": { + "id": "44dd9f282d2d1f4263edcbc4[intent keyword]", + "name": "Intent Keyword", + "value": "end" + } + }, + "e7d374a1e2d32c5e67bc68cc": { + "id": "44dd9f282d2d1f4263edcbc4[intent label]", + "name": "Intent Label", + "value": { + "id": "44dd9f282d2d1f4263edcbc4[intent label]", + "name": "Intent Label", + "value": "" + } + }, + "3e8bfec67da28c354a969b46": { + "id": "44dd9f282d2d1f4263edcbc4[nlu id]", + "name": "NLU ID", + "value": { + "id": "44dd9f282d2d1f4263edcbc4[nlu id]", + "name": "NLU ID", + "value": "" + } + }, + "8f8bee572ff26bedbfb46783": { + "id": "44dd9f282d2d1f4263edcbc4[isfile]", + "name": "IsFile", + "value": { + "id": "44dd9f282d2d1f4263edcbc4[isfile]", + "name": "IsFile", + "value": false + } + }, + "1ae4674949cfead101f6e8d8": { + "id": "44dd9f282d2d1f4263edcbc4[fileurl]", + "name": "FileURL", + "value": { + "id": "44dd9f282d2d1f4263edcbc4[fileurl]", + "name": "FileURL", + "value": "" + } + }, + "1b9710d3ac9dbdfd3b46110a": { + "id": "44dd9f282d2d1f4263edcbc4[errormessage]", + "name": "ErrorMessage", + "value": { + "id": "44dd9f282d2d1f4263edcbc4[errormessage]", + "name": "ErrorMessage", + "value": "" + } + }, + "90cd8b63ad44500486ddbb02": { + "id": "44dd9f282d2d1f4263edcbc4[message]", + "name": "Message", + "value": { + "id": "44dd9f282d2d1f4263edcbc4[message]", + "name": "Message", + "value": "{}" + } + } + } + }, + "261d47d76bd47f5b8f48196c": { + "label": { + "id": "261d47d76bd47f5b8f48196c[name]", + "name": "Name", + "value": { + "id": "261d47d76bd47f5b8f48196c[name]", + "name": "Name", + "value": "" + } + }, + "left": 4385.6953125, + "top": 4303.4609375, + "width": 50, + "height": 50, + "zIndex": 16006, + "type": "NLU Knowledge", + "attributes": { + "dbcf8b46f8cd76f445b9dea4": { + "id": "261d47d76bd47f5b8f48196c[name]", + "name": "Name", + "value": { + "id": "261d47d76bd47f5b8f48196c[name]", + "name": "Name", + "value": "" + } + }, + "c051de6af456e91e52c0c465": { + "id": "261d47d76bd47f5b8f48196c[url]", + "name": "URL", + "value": { + "id": "261d47d76bd47f5b8f48196c[url]", + "name": "URL", + "value": "" + } + } + } + } + }, + "edges": { + "292d23edf0f72fe399e4d8a5": { + "label": { + "id": "292d23edf0f72fe399e4d8a5[label]", + "name": "Label", + "value": { + "id": "292d23edf0f72fe399e4d8a5[label]", + "name": "Label", + "value": "" + } + }, + "source": "c89ba2cf13956cf4ea761dad", + "target": "872d4c08a5b2749ea4ee2b46", + "attributes": {}, + "type": "has" + }, + "594c8e479f912a9dc9e51982": { + "label": { + "id": "594c8e479f912a9dc9e51982[label]", + "name": "Label", + "value": { + "id": "594c8e479f912a9dc9e51982[label]", + "name": "Label", + "value": "" + } + }, + "source": "872d4c08a5b2749ea4ee2b46", + "target": "a", + "attributes": {}, + "type": "generates" + }, + "c593bae7b370cbc3e50dd0e8": { + "label": { + "id": "c593bae7b370cbc3e50dd0e8[label]", + "name": "Label", + "value": { + "id": "c593bae7b370cbc3e50dd0e8[label]", + "name": "Label", + "value": "longPath" + } + }, + "source": "a", + "target": "a2", + "attributes": {}, + "type": "leadsTo" + }, + "351bd9a22ad80c2a76975ac1": { + "label": { + "id": "351bd9a22ad80c2a76975ac1[label]", + "name": "Label", + "value": { + "id": "351bd9a22ad80c2a76975ac1[label]", + "name": "Label", + "value": "" + } + }, + "source": "872d4c08a5b2749ea4ee2b46", + "target": "a2", + "attributes": {}, + "type": "generates" + }, + "daf562d87e3f04e601e434a9": { + "label": { + "id": "daf562d87e3f04e601e434a9[label]", + "name": "Label", + "value": { + "id": "daf562d87e3f04e601e434a9[label]", + "name": "Label", + "value": "endConvo" + } + }, + "source": "a2", + "target": "e", + "attributes": {}, + "type": "leadsTo" + }, + "b114960d9cbece294c30bf5c": { + "label": { + "id": "b114960d9cbece294c30bf5c[label]", + "name": "Label", + "value": { + "id": "b114960d9cbece294c30bf5c[label]", + "name": "Label", + "value": "" + } + }, + "source": "c89ba2cf13956cf4ea761dad", + "target": "261d47d76bd47f5b8f48196c", + "attributes": {}, + "type": "has" + } + } +} \ No newline at end of file diff --git a/enhancement/assets/test.xes b/enhancement/assets/test.xes new file mode 100644 index 0000000..cce2cb3 --- /dev/null +++ b/enhancement/assets/test.xes @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/enhancement/test.py b/enhancement/test.py new file mode 100644 index 0000000..b350732 --- /dev/null +++ b/enhancement/test.py @@ -0,0 +1,46 @@ +import unittest +import json +import os +import pm4py +import os +import sys + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from enhancement.main import add_edge_frequency +from utils.bot.parse_lib import get_parser, BotParser + + + +def get_bot_model_json(rel_path): + current_dir = os.path.dirname(os.path.abspath(__file__)) + with open(os.path.join(current_dir, rel_path), 'r', encoding='UTF-8') as f: + return json.load(f) + + +def get_event_log(rel_path): + current_dir = os.path.dirname(os.path.abspath(__file__)) + return pm4py.read_xes(os.path.join(current_dir, rel_path)) + + +class TestBotParser(unittest.TestCase): + def setUp(self): + self.bot_model = None + self.instance = None + + def test_matching_alignment(self): + self.bot_model = get_bot_model_json('assets/alignment-test.json') + event_log = get_event_log('assets/test.xes') + self.instance = get_parser(self.bot_model) + self.assertIsInstance(self.instance, BotParser) + dfg, start,end = self.instance.get_dfg() + result_dfg = add_edge_frequency(event_log=event_log, bot_model_dfg=dfg,start_act=start,end_act=end,bot_parser=self.instance) + self.assertEqual(len(result_dfg.keys()), 2) + self.assertEqual(result_dfg[('a', 'a2')], 1) + self.assertEqual(result_dfg[('a2', 'e')], 1) + + + + +if __name__ == '__main__': + unittest.main() From 6ce6a28e36bb5e61fb7c9994203031eb07f94dc8 Mon Sep 17 00:00:00 2001 From: lakhoune Date: Thu, 8 Feb 2024 10:08:52 +0100 Subject: [PATCH 4/8] fix cyclic --- utils/bot/parse_lib.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/utils/bot/parse_lib.py b/utils/bot/parse_lib.py index 67f0f92..0c16745 100644 --- a/utils/bot/parse_lib.py +++ b/utils/bot/parse_lib.py @@ -168,7 +168,8 @@ def get_dfg(self): # if the start_activity has an ingoing edge, we have a cyclic graph. # In this case, each we should add a dummy end activity and # connect each node that has an outgoing edge to a start activity to this dummy end activity instead - ingoing_edges = self.get_incoming_edges(node_id) + ingoing_edges = self.get_incoming_edges_that_are_end_activities( + node_id,end_activities) if (len(ingoing_edges) > 0): if("empty_intent" not in end_activities): end_activities.add("empty_intent") @@ -190,11 +191,11 @@ def get_outgoing_edges(self, node_id): """ outgoing_edges = [] for edge in self.edges.values(): - if edge['source'] == node_id: + if edge['source'] == node_id and edge['type'] in self.edge_types_of_interest: outgoing_edges.append(edge) return outgoing_edges - def get_incoming_edges(self, node_id): + def get_incoming_edges_that_are_end_activities(self, node_id,end_activities): """ Gets the incoming edges of a node :param node_id: the id of the node @@ -205,7 +206,9 @@ def get_incoming_edges(self, node_id): """ incoming_edges = [] for edge in self.edges.values(): - if edge['target'] == node_id: + if edge['source'] not in end_activities: + continue + if edge['target'] == node_id and edge['type'] in self.edge_types_of_interest and self.nodes[edge['source']]['type'] in self.node_types_of_interest: incoming_edges.append(edge) return incoming_edges From f3471a25e31517d2a8cb0050aa6979b05cf2a8d8 Mon Sep 17 00:00:00 2001 From: lakhoune Date: Thu, 8 Feb 2024 11:01:10 +0100 Subject: [PATCH 5/8] update test log --- enhancement/assets/test.xes | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/enhancement/assets/test.xes b/enhancement/assets/test.xes index cce2cb3..29ab49c 100644 --- a/enhancement/assets/test.xes +++ b/enhancement/assets/test.xes @@ -7,11 +7,11 @@ - + - + From 492c7d8ea33abc4de7b490f904818bf2876a1ae4 Mon Sep 17 00:00:00 2001 From: lakhoune Date: Thu, 8 Feb 2024 11:05:43 +0100 Subject: [PATCH 6/8] update test for edge performance --- enhancement/test.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/enhancement/test.py b/enhancement/test.py index b350732..18acd33 100644 --- a/enhancement/test.py +++ b/enhancement/test.py @@ -7,7 +7,7 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from enhancement.main import add_edge_frequency +from enhancement.main import add_edge_frequency,add_edge_performance from utils.bot.parse_lib import get_parser, BotParser @@ -28,17 +28,28 @@ def setUp(self): self.bot_model = None self.instance = None - def test_matching_alignment(self): + def test_frequency_dfg(self): self.bot_model = get_bot_model_json('assets/alignment-test.json') event_log = get_event_log('assets/test.xes') self.instance = get_parser(self.bot_model) self.assertIsInstance(self.instance, BotParser) dfg, start,end = self.instance.get_dfg() - result_dfg = add_edge_frequency(event_log=event_log, bot_model_dfg=dfg,start_act=start,end_act=end,bot_parser=self.instance) + result_dfg = add_edge_frequency(event_log=event_log, dfg=dfg,start_act=start,end_act=end,bot_parser=self.instance) self.assertEqual(len(result_dfg.keys()), 2) self.assertEqual(result_dfg[('a', 'a2')], 1) self.assertEqual(result_dfg[('a2', 'e')], 1) + def test_performance_dfg(self): + self.bot_model = get_bot_model_json('assets/alignment-test.json') + event_log = get_event_log('assets/test.xes') + self.instance = get_parser(self.bot_model) + self.assertIsInstance(self.instance, BotParser) + dfg, start,end = self.instance.get_dfg() + result_dfg = add_edge_performance(event_log=event_log, dfg=dfg,start_act=start,end_act=end,bot_parser=self.instance) + self.assertEqual(len(result_dfg.keys()), 2) + self.assertEqual(result_dfg[('a', 'a2')], 65) + self.assertEqual(result_dfg[('a2', 'e')], 30) + From 288c23d7b50dad0344076a8fee7372053b6184c9 Mon Sep 17 00:00:00 2001 From: lakhoune Date: Thu, 8 Feb 2024 11:50:15 +0100 Subject: [PATCH 7/8] performance --- bot_blueprint.py | 19 ++--- enhancement/main.py | 181 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 152 insertions(+), 48 deletions(-) diff --git a/bot_blueprint.py b/bot_blueprint.py index 3cbad98..d19cafa 100644 --- a/bot_blueprint.py +++ b/bot_blueprint.py @@ -25,13 +25,14 @@ def enhanced_bot_model(botName): }, 400 event_log_url = request.args['event-log-url'] res_format = request.args.get('format', 'json') - - if request.method == 'GET': - if 'bot-manager-url' not in request.args: + if 'bot-manager-url' not in request.args: return { "error": "bot-manager-url parameter is missing" }, 400 - bot_manager_url = request.args['bot-manager-url'] + bot_manager_url = request.args['bot-manager-url'] + + if request.method == 'GET': + try: bot_model_json = fetch_bot_model(botName, bot_manager_url) if bot_model_json is None: @@ -66,14 +67,14 @@ def enhanced_bot_model(botName): try: bot_parser = get_parser(bot_model_json) - bot_model_dfg, start_activities, end_activities, performance, frequency = enhance_bot_model( + bot_model_dfg, start_activities, end_activities, performance_dfg, frequency_dfg = enhance_bot_model( event_log, bot_parser,repair=True) if res_format == 'svg': gviz = dfg_visualizer.apply(bot_model_dfg) return gviz.pipe(format='svg').decode('utf-8') return serialize_response( - bot_model_dfg, bot_parser, start_activities, end_activities, performance, botName, frequency[0]) + bot_model_dfg, bot_parser, start_activities, end_activities, performance_dfg, botName, frequency_dfg) except Exception as e: print(e) return { @@ -305,7 +306,7 @@ def get_groups(botName): return fetchL2PGroups(contact_service_url, botName, current_app.default_bot_pw) -def serialize_response(bot_model_dfg, bot_parser, start_activities, end_activities, performance, botName, frequency_dfg): +def serialize_response(bot_model_dfg, bot_parser, start_activities, end_activities, performance_dfg, botName, frequency_dfg): added_edges = set() try: # serialize the bot model @@ -323,7 +324,7 @@ def serialize_response(bot_model_dfg, bot_parser, start_activities, end_activiti avg_confidence[keyword] = 0 else: avg_confidence[keyword] = row['averageConfidence'] - for edge, frequency in bot_model_dfg.items(): + for edge, _ in bot_model_dfg.items(): source_intent = bot_parser.id_name_map[edge[0] ] if edge[0] in bot_parser.id_name_map else None target_intent = bot_parser.id_name_map[edge[1] @@ -337,7 +338,7 @@ def serialize_response(bot_model_dfg, bot_parser, start_activities, end_activiti edges.append({ "source": edge[0], "target": edge[1], - "performance": performance[(source_label, target_label)] if (source_label, target_label) in performance else None, + "performance": performance_dfg[(source_label, target_label)] if (source_label, target_label) in performance_dfg else None, "frequency": frequency_dfg[(source_label, target_label)] if (source_label, target_label) in frequency_dfg else None, }) added_edges.add((edge[0], edge[1])) diff --git a/enhancement/main.py b/enhancement/main.py index b71faf6..8732ddc 100644 --- a/enhancement/main.py +++ b/enhancement/main.py @@ -3,6 +3,7 @@ import pandas as pd import itertools import uuid +import statistics import numpy as np from pm4py.statistics.traces.generic.log import case_statistics from pm4py.algo.conformance.alignments.petri_net import algorithm as alignments_algorithm @@ -12,7 +13,7 @@ bot_model_json_path = "./assets/models/test_bot_model.json" -def enhance_bot_model(event_log, bot_parser,repair=False): +def enhance_bot_model(event_log, bot_parser, repair=False): """ Enhance the bot model using the event log. We assume that the bot model is incomplete @@ -25,18 +26,21 @@ def enhance_bot_model(event_log, bot_parser,repair=False): :return: enhanced bot model """ dfg, start_activities, end_activities = bot_parser.get_dfg() # initial dfg - net,im,fm = bot_parser.to_petri_net() + net, im, fm = bot_parser.to_petri_net() if repair == True: - net,_,_ = repair_petri_net(event_log,net,im,fm) # repair the dfg - dfg = add_edge_frequency(event_log, dfg, start_activities, - end_activities,bot_parser) # add the edge frequency - performance = pm4py.discovery.discover_performance_dfg(event_log) - frequency = pm4py.discovery.discover_dfg(event_log) - # replace NaN values with None - performance = __replace_nan_with_null(performance[0]) - return dfg, start_activities, end_activities, performance, frequency - -def repair_petri_net(event_log, net,im,fm): + net, _, _ = repair_petri_net(event_log, net, im, fm) # repair the dfg + frequency_dfg = add_edge_frequency(event_log, dfg, start_activities, + end_activities, bot_parser) # add the edge frequency + performance_dfg = add_edge_performance( + event_log, dfg, start_activities, end_act=end_activities,bot_parser=bot_parser) # add the edge performance + # performance = pm4py.discovery.discover_performance_dfg(event_log) + # frequency = pm4py.discovery.discover_dfg(event_log) + # replace NaN values with None + # performance = __replace_nan_with_null(performance[0]) + return dfg, start_activities, end_activities, frequency_dfg, performance_dfg + + +def repair_petri_net(event_log, net, im, fm): """ Repair the bot model using the event log. We assume that the bot model is incomplete @@ -48,15 +52,14 @@ def repair_petri_net(event_log, net,im,fm): :param bot_model_dfg: bot model as a DFG :return: enhanced bot model """ - net,_,_ = repair_process_model(net,im,fm,event_log) + net, _, _ = repair_process_model(net, im, fm, event_log) net = pm4py.reduce_petri_net_invisibles(net) - net,im,fm = pm4py.reduce_petri_net_implicit_places(net,im,fm) + net, im, fm = pm4py.reduce_petri_net_implicit_places(net, im, fm) # for some very weird reasons the repair function swaps the initial and final places. As a workaround we return the final marking as the initial marking and vice versa - return net,fm,im + return net, fm, im - -def add_edge_frequency(event_log, bot_model_dfg, start_act, end_act, bot_parser): +def add_edge_frequency(event_log, dfg, start_act, end_act, bot_parser): """ Add the edge frequency to the bot model :param event_log: event log @@ -66,15 +69,15 @@ def add_edge_frequency(event_log, bot_model_dfg, start_act, end_act, bot_parser) :param bot_parser: bot parser :return: bot model with edge frequency """ - - net, im, fm = bot_parser.to_petri_net(bot_model_dfg, start_act, end_act) + frequency_dfg = dfg.copy() + net, im, fm = bot_parser.to_petri_net(frequency_dfg, start_act, end_act) alignments_results = alignments_algorithm.apply(event_log, net, im, fm, { - Parameters.PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE: True}) + Parameters.PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE: True}) variants = pm4py.stats.get_variants_as_tuples(event_log) - new_nodes = dict() # nodes that are added to the bot model + new_nodes = dict() # nodes that are added to the bot model for alignment in list(diagnostic['alignment'] for diagnostic in alignments_results): log_trace = tuple(log_move[0] for model_move, - log_move in alignment if log_move[0] != ">>") # trace as it is in the log + log_move in alignment if log_move[0] != ">>") # trace as it is in the log # model_trace = tuple( # log_move[1] for model_move, log_move in alignment if model_move[1] != ">>") # for debugging log_trace_count = variants[log_trace] @@ -83,11 +86,11 @@ def add_edge_frequency(event_log, bot_model_dfg, start_act, end_act, bot_parser) for ((source, align_source), (target, align_target)) in itertools.pairwise(alignment): if (source[1] == ">>"): - if align_source[0] in new_nodes.keys(): + if align_source[0] in new_nodes.keys(): source_id = new_nodes[align_source[0]] else: source_id = str(uuid.uuid4()) - new_nodes[align_source[0]] = source_id + new_nodes[align_source[0]] = source_id bot_parser.id_name_map[source_id] = align_source[0] else: source_id = source[1].split("_")[0] @@ -101,18 +104,93 @@ def add_edge_frequency(event_log, bot_model_dfg, start_act, end_act, bot_parser) else: target_id = target[1].split("_")[0] + if (source_id, target_id) in frequency_dfg: + frequency_dfg[(source_id, target_id)] += log_trace_count + else: + potential_start_activities.add(source_id) + potential_end_activities.add(target_id) + frequency_dfg[(source_id, target_id)] = log_trace_count + + for potential_start_activity in potential_start_activities: + # check if the potential start activity has no incoming edge + has_incoming_edge = False + for _, target in frequency_dfg.keys(): + if target == potential_start_activity: + has_incoming_edge = True + break + if not has_incoming_edge and potential_start_activity not in start_act: + start_act.add(potential_start_activity) + for potential_end_activity in potential_end_activities: + # check if the potential end activity has no outgoing edge + has_outgoing_edge = False + for source, _ in frequency_dfg.keys(): + if source == potential_end_activity: + has_outgoing_edge = True + break + if not has_outgoing_edge and potential_end_activity not in end_act: + end_act.add(potential_end_activity) + return frequency_dfg + + +def add_edge_performance(event_log, dfg, start_act, end_act, bot_parser): + """ + Add the edge performance to the bot model + :param event_log: event log + :param bot_model_dfg: bot model as a DFG + :param start_act: start activities + :param end_act: end activities + :param bot_parser: bot parser + :return: bot model with edge frequency + """ + performance_dfg = dfg.copy() + net, im, fm = bot_parser.to_petri_net(performance_dfg, start_act, end_act) + alignments_results = alignments_algorithm.apply(event_log, net, im, fm, { + Parameters.PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE: True}) + variants = pm4py.stats.get_variants_as_tuples(event_log) + new_nodes = dict() # nodes that are added to the bot model + for alignment in list(diagnostic['alignment'] for diagnostic in alignments_results): + log_trace = tuple(log_move[0] for model_move, + log_move in alignment if log_move[0] != ">>") # trace as it is in the log + model_trace = tuple( + log_move[1] for model_move, log_move in alignment if model_move[1] != ">>") # for debugging + performance = _average_performance_of_trace( + log_trace, event_log, alignment) + potential_start_activities = set() + potential_end_activities = set() + + for ((source, align_source), (target, align_target)) in itertools.pairwise(alignment): + if (source[1] == ">>"): + if align_source[0] in new_nodes.keys(): + source_id = new_nodes[align_source[0]] + else: + source_id = str(uuid.uuid4()) + new_nodes[align_source[0]] = source_id + bot_parser.id_name_map[source_id] = align_source[0] + else: + source_id = source[1].split("_")[0] + if (target[1] == ">>"): + if align_target[0] in new_nodes.keys(): + target_id = new_nodes[align_target[0]] + else: + target_id = str(uuid.uuid4()) + new_nodes[align_target[0]] = target_id + bot_parser.id_name_map[target_id] = align_target[0] + else: + target_id = target[1].split("_")[0] - if (source_id, target_id) in bot_model_dfg: - bot_model_dfg[(source_id, target_id)] += log_trace_count + if (source_id, target_id) in performance_dfg and performance_dfg[(source_id, target_id)] != 0: + performance_dfg[(source_id, target_id)] = statistics.mean([ + performance_dfg[(source_id, target_id)], performance[(source_id, target_id)]]) if (source_id, target_id) in performance else performance_dfg[(source_id, target_id)] else: potential_start_activities.add(source_id) potential_end_activities.add(target_id) - bot_model_dfg[(source_id, target_id)] = log_trace_count - + performance_dfg[(source_id, target_id) + ] = performance[(source_id, target_id)] if (source_id, target_id) in performance else 0 + for potential_start_activity in potential_start_activities: # check if the potential start activity has no incoming edge has_incoming_edge = False - for _, target in bot_model_dfg.keys(): + for _, target in performance_dfg.keys(): if target == potential_start_activity: has_incoming_edge = True break @@ -121,13 +199,38 @@ def add_edge_frequency(event_log, bot_model_dfg, start_act, end_act, bot_parser) for potential_end_activity in potential_end_activities: # check if the potential end activity has no outgoing edge has_outgoing_edge = False - for source, _ in bot_model_dfg.keys(): + for source, _ in performance_dfg.keys(): if source == potential_end_activity: has_outgoing_edge = True break if not has_outgoing_edge and potential_end_activity not in end_act: end_act.add(potential_end_activity) - return bot_model_dfg + return performance_dfg + + +def _average_performance_of_trace(log_trace, event_log, alignment): + """ + gets the average performance of a trace in the event log. For each edge in the trace, we get the average duration + """ + cases = pm4py.filtering.filter_variants(event_log, [log_trace]) + durations = dict() + for case in cases.groupby("case:concept:name"): + case = case[1] + for i in range(0, len(case) - 1): + edge = (case.iloc[i]["concept:name"], + case.iloc[i + 1]["concept:name"]) + corresponding_model_ids = tuple(map(lambda x: x.split( + "_")[0], (alignment[i][0][1], alignment[i+1][0][1]))) + if corresponding_model_ids is None: + corresponding_model_ids = edge + if corresponding_model_ids in durations: + statistics.mean( + [durations[corresponding_model_ids], ( + (case.iloc[i + 1]["time:timestamp"] - case.iloc[i]["time:timestamp"]).total_seconds())]) + else: + durations[corresponding_model_ids] = ( + (case.iloc[i + 1]["time:timestamp"] - case.iloc[i]["time:timestamp"]).total_seconds()) + return durations def _find_trace_in_log(log_moves, log): @@ -228,7 +331,7 @@ def get_alignment_for_variant(variant, alignments_results): return None -# # debug +# # debug # import sys # import os @@ -239,7 +342,7 @@ def get_alignment_for_variant(variant, alignments_results): # from utils.bot.parse_lib import get_parser # from utils.api_requests import load_default_bot_model,get_default_event_log # if __name__ == "__main__": - + # event_log = get_default_event_log() # # test if time:timestamp is a datetime # # make time:timestamp to datetime @@ -254,8 +357,8 @@ def get_alignment_for_variant(variant, alignments_results): # def repair_bot_model(event_log, bot_parser, bot_model_dfg, start_activities, end_activities): # """ # Enhance the bot model using the event log. -# We assume that the bot model is incomplete -# as it does not contain subprocesses which are logged when +# We assume that the bot model is incomplete +# as it does not contain subprocesses which are logged when # the bot is communicating with an external service. # We say that the bot is in the service context in that case. # The event log contains the information whether we are in a service context as an additional attribute. @@ -264,7 +367,7 @@ def get_alignment_for_variant(variant, alignments_results): # :param bot_model_dfg: bot model as a DFG # :param start_activities: start activities # :param end_activities: end activities -# :return: enhanced bot model +# :return: enhanced bot model # """ # net, im, fm = bot_parser.to_petri_net(bot_model_dfg, start_activities, end_activities) # alignments = list(a['alignment'] for a in pm4py.conformance.conformance_diagnostics_alignments( @@ -296,15 +399,15 @@ def get_alignment_for_variant(variant, alignments_results): # bot_model_dfg[(tmp['id'], anchor['id'])] = 0 # anchor = None # tmp = None - + # if row['EVENT'] == "SERVICE_REQUEST": # anchor = {'name': row['concept:name'], 'id': bot_parser.get_node_id_by_name( # row['concept:name'])} # defines the (potential) start of a subprocess - + # tmp = anchor.copy() # if tmp!= None: # potential_end_activities.add(tmp['id']) - + # for potential_start_activity in potential_start_activities: # # check if the potential start activity has no incoming edge # has_incoming_edge = False From db09af3f9c87381f86a4c5bc6cef766c8827d81e Mon Sep 17 00:00:00 2001 From: lakhoune Date: Thu, 8 Feb 2024 12:00:47 +0100 Subject: [PATCH 8/8] fix stats --- bot_blueprint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bot_blueprint.py b/bot_blueprint.py index d19cafa..b49fba5 100644 --- a/bot_blueprint.py +++ b/bot_blueprint.py @@ -338,8 +338,8 @@ def serialize_response(bot_model_dfg, bot_parser, start_activities, end_activiti edges.append({ "source": edge[0], "target": edge[1], - "performance": performance_dfg[(source_label, target_label)] if (source_label, target_label) in performance_dfg else None, - "frequency": frequency_dfg[(source_label, target_label)] if (source_label, target_label) in frequency_dfg else None, + "performance": performance_dfg[(edge[0], edge[1])] if (edge[0], edge[1]) in performance_dfg else None, + "frequency": frequency_dfg[(edge[0], edge[1])] if (edge[0], edge[1]) in frequency_dfg else None, }) added_edges.add((edge[0], edge[1]))