Skip to content

Commit 0fab013

Browse files
Add streaming match generation (#34)
1 parent 80420ee commit 0fab013

File tree

1 file changed

+74
-37
lines changed

1 file changed

+74
-37
lines changed

grandcypher/__init__.py

+74-37
Original file line numberDiff line numberDiff line change
@@ -306,15 +306,15 @@ def _data_path_to_entity_name_attribute(data_path):
306306

307307

308308
class _GrandCypherTransformer(Transformer):
309-
def __init__(self, target_graph: nx.Graph):
309+
def __init__(self, target_graph: nx.Graph, limit=None):
310310
self._target_graph = target_graph
311311
self._where_condition: CONDITION = None
312312
self._motif = nx.DiGraph()
313313
self._matches = None
314314
self._matche_paths = None
315315
self._return_requests = []
316316
self._return_edges = {}
317-
self._limit = None
317+
self._limit = limit
318318
self._skip = 0
319319
self._max_hop = 100
320320

@@ -397,43 +397,22 @@ def returns(self, ignore_limit=False):
397397
return self._lookup(self._return_requests, offset_limit=offset_limit)
398398

399399
def _get_true_matches(self):
400-
# filter the matches based upon the conditions of the where clause:
401-
# TODO: promote these to inside the monomorphism search
402-
actual_matches = []
403-
for match, match_path in self._get_structural_matches():
404-
if not self._where_condition or self._where_condition(
405-
match, self._target_graph, self._return_edges
406-
):
407-
actual_matches.append((match, match_path))
408-
return actual_matches
409-
410-
def _get_structural_matches(self):
411400
if not self._matches:
412401
self_matches = []
413402
self_matche_paths = []
403+
complete = False
404+
414405
for my_motif, edge_hop_map in self._edge_hop_motifs(self._motif):
415-
matches = []
416-
for motif in (
417-
my_motif.subgraph(c)
418-
for c in nx.weakly_connected_components(my_motif)
419-
):
420-
_matches = grandiso.find_motifs(
421-
motif,
422-
self._target_graph,
423-
limit=(self._limit + self._skip + 1)
424-
if (self._skip and self._limit)
425-
else None,
426-
is_node_attr_match=_is_node_attr_match,
427-
is_edge_attr_match=_is_edge_attr_match,
428-
)
429-
if not matches:
430-
matches = _matches
431-
elif _matches:
432-
matches = [{**a, **b} for a in matches for b in _matches]
406+
# Iteration is complete
407+
if complete:
408+
break
409+
433410
zero_hop_edges = [
434411
k for k, v in edge_hop_map.items() if len(v) == 2 and v[0] == v[1]
435412
]
436-
for match in matches:
413+
414+
# Iterate over generated matches
415+
for match in self._matches_iter(my_motif):
437416
# matches can contains zero hop edges from A to B
438417
# there are 2 cases to take care
439418
# (1) there are both A and B in the match. This case is the result of query A -[*0]-> B --> C.
@@ -448,13 +427,70 @@ def _get_structural_matches(self):
448427
):
449428
break
450429
match[b] = match[a]
451-
else:
452-
self_matches.append(match)
453-
self_matche_paths.append(edge_hop_map)
430+
else: # For/else loop
431+
# Check if match matches where condition and add
432+
if not self._where_condition or self._where_condition(
433+
match, self._target_graph, self._return_edges
434+
):
435+
self_matches.append(match)
436+
self_matche_paths.append(edge_hop_map)
437+
438+
# Check if limit reached
439+
if self._is_limit(len(self_matches)):
440+
complete = True
441+
break
442+
454443
self._matches = self_matches
455444
self._matche_paths = self_matche_paths
445+
456446
return list(zip(self._matches, self._matche_paths))
457447

448+
def _matches_iter(self, motif):
449+
# Get list of all match iterators
450+
iterators = [
451+
grandiso.find_motifs_iter(
452+
motif.subgraph(c),
453+
self._target_graph,
454+
is_node_attr_match=_is_node_attr_match,
455+
is_edge_attr_match=_is_edge_attr_match,
456+
) for c in nx.weakly_connected_components(motif)
457+
]
458+
459+
# Single match clause iterator
460+
if iterators and len(iterators) == 1:
461+
for x, match in enumerate(iterators[0]):
462+
yield match
463+
464+
# Multi match clause, requires a cartesian join
465+
else:
466+
iterations, matches = 0, {}
467+
for x, iterator in enumerate(iterators):
468+
for match in iterator:
469+
if x not in matches:
470+
matches[x] = []
471+
472+
matches[x].append(match)
473+
iterations += 1
474+
475+
# Continue to next clause if limit reached
476+
if self._is_limit(len(matches[x])):
477+
continue
478+
479+
# Cartesian product of all match clauses
480+
join = []
481+
for match in matches.values():
482+
if join:
483+
join = [{**a, **b} for a in join for b in match]
484+
else:
485+
join = match
486+
487+
# Yield cartesian product
488+
yield from join
489+
490+
def _is_limit(self, count):
491+
# Check if limit reached
492+
return self._limit and count >= (self._limit + self._skip)
493+
458494
def _edge_hop_motifs(self, motif: nx.DiGraph) -> List[Tuple[nx.Graph, dict]]:
459495
"""generate a list of edge-hop-expanded motif with edge-hop-map.
460496
@@ -681,19 +717,20 @@ class GrandCypher:
681717
682718
"""
683719

684-
def __init__(self, host_graph: nx.Graph) -> None:
720+
def __init__(self, host_graph: nx.Graph, limit: int = None) -> None:
685721
"""
686722
Create a new GrandCypher object to query graphs with Cypher.
687723
688724
Arguments:
689725
host_graph (nx.Graph): The host graph to use as a "graph database"
726+
limit (int): The default limit to apply to queries when not otherwise provided
690727
691728
Returns:
692729
None
693730
694731
"""
695732

696-
self._transformer = _GrandCypherTransformer(host_graph)
733+
self._transformer = _GrandCypherTransformer(host_graph, limit)
697734
self._host_graph = host_graph
698735

699736
def run(self, cypher: str) -> Dict[str, List]:

0 commit comments

Comments
 (0)