Skip to content

Commit

Permalink
[FIX] dependency inference
Browse files Browse the repository at this point in the history
  • Loading branch information
remiadon committed Feb 8, 2021
1 parent 1af7fb9 commit f094136
Showing 1 changed file with 48 additions and 16 deletions.
64 changes: 48 additions & 16 deletions bids_prov/spm_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import re
import os
from difflib import SequenceMatcher

from collections import defaultdict

Expand All @@ -12,11 +13,14 @@
PATH_REGEX = r"([A-Za-z]:|[A-Za-z0-9_-]+(\.[A-Za-z0-9_-]+)*)((/[A-Za-z0-9_.-]+)+)"
PARAM_REGEX = r"[^\.]+\(\d+\)"
FILE_REGEX = r"(\.[a-z]{1,3}){1,2}"
DEPENDENCY_REGEX = r"""cfg_dep\(['"]([^'")]*)['"]\,.*""" # TODO : add ": " in match

get_id = lambda: "".join(random.choice(string.ascii_letters) for i in range(10))
has_parameter = lambda line: next(re.finditer(PARAM_REGEX, line), None) is not None
# has_entity = lambda line: not has_parameter(line) and next(re.finditer(PATH_REGEX, line), None) is not None

DEPENDENCY_DICT = dict(Segment="spatial.preproc")


def get_input_entity(left, right):
"""get input Entity if possible else return None
Expand All @@ -31,10 +35,11 @@ def get_input_entity(left, right):
if not next(re.finditer(PATH_REGEX, right), None):
return None

f = next(re.finditer(FILE_REGEX, right), None)
if f is None:
if next(re.finditer(FILE_REGEX, right), None) is None:
return None
entity_label = left.split("/")[-1].split(".")[0]

entity_label = re.sub(r"[{};\'\"]", "", right).split("/")[-1]
# import pdb; pdb.set_trace()
entity = {
"@id": "niiri:" + entity_label + get_id(),
"label": entity_label,
Expand Down Expand Up @@ -73,18 +78,16 @@ def group_lines(lines):
return new_res


def get_records(task_groups, records=defaultdict(list)):
def get_records(task_groups: dict, records=defaultdict(list)):
entities_ids = set()
for activity_name, values in task_groups.items():
activity_id = "niiri:" + activity_name + get_id()
activity = {
"@id": activity_id,
"label": "".join(activity_name),
"label": ".".join(activity_name.split(".")[-3:]),
"used": list(),
}
# TODO : add time to activity
# import pdb; pdb.set_trace()
used = list()
entities = []
input_entities, output_entities = list(), list()
params = []
for line in values:
split = line.split(" = ")
Expand All @@ -93,11 +96,39 @@ def get_records(task_groups, records=defaultdict(list)):
continue
left, right = split

entity = get_input_entity(left, right)
if entity:
entities.append(entity)
elif has_parameter(line):
pass
_in = get_input_entity(left, right)
if _in:
input_entities.append(_in)
elif has_parameter(left):
dependency = re.search(DEPENDENCY_REGEX, right, re.IGNORECASE)
if dependency is not None:
# import pdb; pdb.set_trace()
parts = dependency.group(1).split(": ")
to_match = "".join(
parts[:-1]
).lower() # TODO : str.lower before that
if to_match in DEPENDENCY_DICT:
to_match = DEPENDENCY_DICT[to_match]
closest_activity = max(
records["prov:Activity"],
key=lambda a: SequenceMatcher(
None, a["label"], to_match
).ratio(),
)
_id = "niiri:" + parts[-1].replace(" ", "") + get_id()
activity["used"].append(_id)
output_entities.append(
{
"@id": _id,
"label": parts[-1],
# "prov:atLocation": TODO
"wasGeneratedBy": closest_activity["@id"],
"attributedTo": "RRID:SCR_0070037",
}
)
pass
else:
Warning(f"Could not parse line {line}")
else:
param_name = ".".join(left.split(".")[-2:])
try:
Expand All @@ -107,8 +138,9 @@ def get_records(task_groups, records=defaultdict(list)):
except:
continue

if entities:
activity["used"] = [e["@id"] for e in entities]
if input_entities:
activity["used"] = [e["@id"] for e in input_entities]
entities = input_entities + output_entities
if params:
activity["attributes"] = params
records["prov:Activity"].append(activity)
Expand Down

0 comments on commit f094136

Please sign in to comment.