From b84cc2f377b950a97585bd4ec46313025672d67b Mon Sep 17 00:00:00 2001 From: jiwen xin Date: Tue, 12 May 2020 09:46:15 -0700 Subject: [PATCH 1/2] fix knowledge graph node id problem --- biothings_explorer/export/reasoner.py | 102 ++++++++++++++++---------- 1 file changed, 62 insertions(+), 40 deletions(-) diff --git a/biothings_explorer/export/reasoner.py b/biothings_explorer/export/reasoner.py index 48e87ab7..25a8bacb 100644 --- a/biothings_explorer/export/reasoner.py +++ b/biothings_explorer/export/reasoner.py @@ -10,9 +10,10 @@ from collections import defaultdict -class ReasonerConverter(): +class ReasonerConverter: """Convert the output of BTE to ReasonerAPIStd.""" + def __init__(self): self.result = defaultdict(list) @@ -23,8 +24,17 @@ def load_bte_query_path(self, start, intermediate, end): :param: intermediate : the intermediate nodes connecting input and output :param: end : the output of user query """ - self.path = [start.get('type')] + self.path = [start.get("type")] self.start = start + if ":" in self.start["primary"]["value"]: + self.start_node_curie = self.start["primary"]["value"] + else: + self.start_node_curie = ( + self.start["primary"]["identifier"] + + ":" + + self.start["primary"]["value"] + ) + if intermediate: if isinstance(intermediate, list): self.path += intermediate @@ -35,7 +45,7 @@ def load_bte_query_path(self, start, intermediate, end): elif isinstance(end, list): self.path.append(tuple(end)) elif isinstance(end, dict): - self.path.append(end.get('type')) + self.path.append(end.get("type")) def load_bte_output(self, G): """Load bte output in the format of networkx graph into class. @@ -55,7 +65,11 @@ def get_curie(self, node): node_info = self.nodes[node] if "identifier" in node_info: prefix = node_info["identifier"] - curie = prefix.upper() + ':' + node + # if the node id is already in curie format + # then no need to add prefix again + if ":" in node: + return node + curie = prefix + ":" + node return curie return node @@ -74,15 +88,17 @@ def fetch_edges(self): for k, v, o in self.G.edges(data=True): source_id = self.get_curie(k) target_id = self.get_curie(v) - edge_source = o['info'].get('$api') - _type = o.get('label') + edge_source = o["info"].get("$api") + _type = o.get("label") _id = self.hash_id(source_id + target_id + edge_source + _type) - edge = {"source_id": source_id, - "target_id": target_id, - "edge_source": edge_source, - "id": _id, - "type": _type} - self.result[source_id + '|||' + target_id].append(_id) + edge = { + "source_id": source_id, + "target_id": target_id, + "edge_source": edge_source, + "id": _id, + "type": _type, + } + self.result[source_id + "|||" + target_id].append(_id) edges.append(edge) return edges @@ -90,15 +106,17 @@ def fetch_nodes(self): """Reorganize the nodes into reasonerSTD format.""" nodes = [] for k, v in self.nodes: - name = v['equivalent_ids'].get("name") + name = v["equivalent_ids"].get("name") if name and isinstance(name, list): name = str(name[0]) else: name = str(self.get_curie(k)) - node = {"id": self.get_curie(k), - "name": name, - "type": v["type"], - "equivalent_identifiers": v['equivalent_ids']} + node = { + "id": self.get_curie(k), + "name": name, + "type": v["type"], + "equivalent_identifiers": v["equivalent_ids"], + } nodes.append(node) return nodes @@ -106,8 +124,7 @@ def generate_knowledge_graph(self): """Reorganize the nodes and edges into reasonerSTD format.""" if len(self.G) == 0: return {"nodes": [], "edges": []} - return {"nodes": self.fetch_nodes(), - "edges": self.fetch_edges()} + return {"nodes": self.fetch_nodes(), "edges": self.fetch_edges()} def generate_question_graph(self): if not self.path: @@ -122,51 +139,56 @@ def generate_question_graph(self): for i, node in enumerate(self.path): if isinstance(node, str): - nodes.append({"id": "n" + str(node_id), - "type": node}) - node2idmapping[str(i) + '-' + node + '-0'] = "n" + str(node_id) + nodes.append({"id": "n" + str(node_id), "type": node}) + node2idmapping[str(i) + "-" + node + "-0"] = "n" + str(node_id) node_id += 1 elif isinstance(node, tuple): for j, _n in enumerate(node): - nodes.append({"id": "n" + str(node_id), - "type": _n}) - node2idmapping[str(i) + '-' + _n + '-' + str(j)] = "n" + str(node_id) + nodes.append({"id": "n" + str(node_id), "type": _n}) + node2idmapping[str(i) + "-" + _n + "-" + str(j)] = "n" + str( + node_id + ) node_id += 1 - nodes[0]['curie'] = [self.start['primary']['identifier'] + ':' + self.start['primary']['value']] - + nodes[0]["curie"] = self.start_node_curie for i in range(0, len(self.path) - 1): source_node = self.path[i] - target_node = self.path[i+1] + target_node = self.path[i + 1] if isinstance(source_node, str): source_node = [source_node] if isinstance(target_node, str): target_node = [target_node] for p, _s in enumerate(source_node): for q, _t in enumerate(target_node): - source_id = node2idmapping[str(i) + '-' + _s + '-' + str(p)] - target_id = node2idmapping[str(i+1) + '-' + _t + '-' + str(q)] - edges.append({"id": "e" + str(edge_id), - "source_id": source_id, - "target_id": target_id, - "directed": True}) + source_id = node2idmapping[str(i) + "-" + _s + "-" + str(p)] + target_id = node2idmapping[str(i + 1) + "-" + _t + "-" + str(q)] + edges.append( + { + "id": "e" + str(edge_id), + "source_id": source_id, + "target_id": target_id, + "directed": True, + } + ) edge_id += 1 - return {"edges": edges, - "nodes": nodes} + return {"edges": edges, "nodes": nodes} def generate_result(self): result = {"node_bindings": [], "edge_bindings": []} if not self.result: return result + result["node_bindings"].append({"qg_id": "n0", "kg_id": self.start_node_curie}) for k, v in self.result.items(): - target_id = k.split('|||')[-1] + target_id = k.split("|||")[-1] result["node_bindings"].append({"qg_id": "n1", "kg_id": target_id}) result["edge_bindings"].append({"qg_id": "e1", "kg_id": v}) return result def generate_reasoner_response(self): """Generate reasoner response.""" - response = {"query_graph": self.generate_question_graph(), - "knowledge_graph": self.generate_knowledge_graph()} - response['results'] = self.generate_result() + response = { + "query_graph": self.generate_question_graph(), + "knowledge_graph": self.generate_knowledge_graph(), + } + response["results"] = self.generate_result() return response From a6b9ce309b19b733465362c4da9d2c61273d3bff Mon Sep 17 00:00:00 2001 From: jiwen xin Date: Tue, 12 May 2020 09:46:27 -0700 Subject: [PATCH 2/2] add test for bte2reasoner --- tests/test_bte2reasoner.py | 113 ++++++++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 26 deletions(-) diff --git a/tests/test_bte2reasoner.py b/tests/test_bte2reasoner.py index 71a127db..1e9b5a3b 100644 --- a/tests/test_bte2reasoner.py +++ b/tests/test_bte2reasoner.py @@ -1,31 +1,92 @@ import unittest -from biothings_explorer.user_query_dispatcher import FindConnection +from biothings_explorer.user_query_dispatcher import ( + FindConnection, + SingleEdgeQueryDispatcher, +) from biothings_explorer.hint import Hint import requests import json -ht = Hint() -cxcr4 = ht.query('CXCR4')['Gene'][0] -fc = FindConnection(input_obj=cxcr4, output_obj='ChemicalSubstance', intermediate_nodes=None) -fc.connect(verbose=True) -response = fc.to_reasoner_std() - -class TestSingleHopQuery(unittest.TestCase): - - def test_result_section(self): - res = requests.post("http://transltr.io:7071/validate_result", - headers={"accept": "text/plain", "content-type": "application/json"}, - data=json.dumps(response["results"])).json() - self.assertEqual(res, "Successfully validated") - - def test_query_graph_section(self): - res = requests.post("http://transltr.io:7071/validate_querygraph", - headers={"accept": "text/plain", "content-type": "application/json"}, - data=json.dumps(response["query_graph"])).json() - self.assertEqual(res, "Successfully validated") - - def test_knowledge_graph_section(self): - res = requests.post("http://transltr.io:7071/validate_knowledgegraph", - headers={"accept": "text/plain", "content-type": "application/json"}, - data=json.dumps(response["knowledge_graph"])).json() - self.assertEqual(res, "Successfully validated") + +class TestFindConnection(unittest.TestCase): + @classmethod + def setUpClass(cls): + ht = Hint() + cxcr4 = ht.query("CXCR4")["Gene"][0] + fc = FindConnection( + input_obj=cxcr4, output_obj="ChemicalSubstance", intermediate_nodes=None + ) + fc.connect(verbose=True) + cls.response = fc.to_reasoner_std() + + def test_result_section(cls): + res = requests.post( + "http://transltr.io:7071/validate_result", + headers={"accept": "text/plain", "content-type": "application/json"}, + data=json.dumps(cls.response["results"]), + ).json() + cls.assertEqual(res, "Successfully validated") + + def test_query_graph_section(cls): + res = requests.post( + "http://transltr.io:7071/validate_querygraph", + headers={"accept": "text/plain", "content-type": "application/json"}, + data=json.dumps(cls.response["query_graph"]), + ).json() + cls.assertEqual(res, "Successfully validated") + + def test_knowledge_graph_section(cls): + res = requests.post( + "http://transltr.io:7071/validate_knowledgegraph", + headers={"accept": "text/plain", "content-type": "application/json"}, + data=json.dumps(cls.response["knowledge_graph"]), + ).json() + cls.assertEqual(res, "Successfully validated") + + +class TestSingleEdgeQuery(unittest.TestCase): + @classmethod + def setUpClass(cls): + seqd = SingleEdgeQueryDispatcher( + input_cls="ChemicalSubstance", + output_cls="Protein", + pred="", + input_id="CHEMBL.COMPOUND", + values="CHEMBL112", + ) + seqd.query() + cls.response = seqd.to_reasoner_std() + + def test_result_section(cls): + res = requests.post( + "http://transltr.io:7071/validate_result", + headers={"accept": "text/plain", "content-type": "application/json"}, + data=json.dumps(cls.response["results"]), + ).json() + cls.assertEqual(res, "Successfully validated") + + def test_query_graph_section(cls): + res = requests.post( + "http://transltr.io:7071/validate_querygraph", + headers={"accept": "text/plain", "content-type": "application/json"}, + data=json.dumps(cls.response["query_graph"]), + ).json() + cls.assertEqual(res, "Successfully validated") + + def test_knowledge_graph_section(cls): + res = requests.post( + "http://transltr.io:7071/validate_knowledgegraph", + headers={"accept": "text/plain", "content-type": "application/json"}, + data=json.dumps(cls.response["knowledge_graph"]), + ).json() + cls.assertEqual(res, "Successfully validated") + + def test_knowledge_graph_node_id(cls): + nodes = [item["id"] for item in cls.response["knowledge_graph"]["nodes"]] + cls.assertIn("CHEMBL.COMPOUND:CHEMBL112", nodes) + cls.assertIn("PR:000011298", nodes) + + def test_results_node_binding(cls): + nodes = [item["kg_id"] for item in cls.response["results"]["node_bindings"]] + cls.assertIn("CHEMBL.COMPOUND:CHEMBL112", nodes) + cls.assertIn("PR:000011298", nodes)