diff --git a/tests/test_absolutize.py b/tests/test_absolutize.py index 694b7df..c820e2d 100644 --- a/tests/test_absolutize.py +++ b/tests/test_absolutize.py @@ -17,24 +17,28 @@ def get_test_data_file(filename): return os.path.join(tests_dir, "test_data", filename) -class TestAbsolutizePaths: #(unittest.TestCase): +class TestAbsolutizePaths: # (unittest.TestCase): # NOTE: ids can not be found, like 634, or forbidden, like 678 @pytest.mark.parametrize("workflow_id", [41, 552]) def test_make_paths_absolute(self, workflow_id): - with patch_rdflib_urlopen(get_test_data_file): - with open(get_test_data_file(f"{workflow_id}_ro-crate-metadata.json"), "r") as f: + with open( + get_test_data_file(f"{workflow_id}_ro-crate-metadata.json"), "r" + ) as f: json_data = json.load(f) - assert not is_all_absolute(rdflib.Graph().parse(data=json.dumps(json_data), format="json-ld")) + assert not is_all_absolute( + rdflib.Graph().parse(data=json.dumps(json_data), format="json-ld") + ) json_data_abs_paths = make_paths_absolute(json_data, BASE_URL, 41) - G = rdflib.Graph().parse(data=json.dumps(json_data_abs_paths), format="json-ld") + G = rdflib.Graph().parse( + data=json.dumps(json_data_abs_paths), format="json-ld" + ) assert is_all_absolute(G) - def test_merged(self): G = merge_all_files("data/*21*.json") assert is_all_absolute(G) diff --git a/tests/test_source_crates.py b/tests/test_source_crates.py index 85a14d1..be874e7 100644 --- a/tests/test_source_crates.py +++ b/tests/test_source_crates.py @@ -141,4 +141,4 @@ def test_process_workflow_ids( with open(expected_file_path, "rb") as f: content = f.read() - assert content == b'{"name": "test"}' \ No newline at end of file + assert content == b'{"name": "test"}' diff --git a/workflowhub_graph/absolutize.py b/workflowhub_graph/absolutize.py index 333bdfd..33fc31d 100644 --- a/workflowhub_graph/absolutize.py +++ b/workflowhub_graph/absolutize.py @@ -14,8 +14,10 @@ def is_all_absolute(G: rdflib.Graph) -> bool: # we accept file:// with a netloc, even if netloc is not a FQDN, # see https://github.com/workflowhub-eu/workflowhub-graph/issues/1#issuecomment-2127351752 - if netloc == '': - print(f"found non-absolute path <{item}> {netloc}, {urlparse(item)}") + if netloc == "": + print( + f"found non-absolute path <{item}> {netloc}, {urlparse(item)}" + ) return False else: print("this path is absolute", item, urlparse(item)) diff --git a/workflowhub_graph/cachedurlopen.py b/workflowhub_graph/cachedurlopen.py index f539d70..9475e6c 100644 --- a/workflowhub_graph/cachedurlopen.py +++ b/workflowhub_graph/cachedurlopen.py @@ -1,21 +1,19 @@ - - from unittest.mock import patch, MagicMock from contextlib import contextmanager import io cached_urls = { - "https://w3id.org/ro/crate/1.0/context": "ro-crate-context-1.0.json", - "https://w3id.org/ro/crate/1.1/context": "ro-crate-context-1.1.json", - } + "https://w3id.org/ro/crate/1.0/context": "ro-crate-context-1.0.json", + "https://w3id.org/ro/crate/1.1/context": "ro-crate-context-1.1.json", +} @contextmanager -def patch_rdflib_urlopen(file_locator): +def patch_rdflib_urlopen(file_locator): def cached_urlopen(request): url = request.get_full_url() - + if url not in cached_urls: # TODO: store and use cache raise ValueError(f"URL {url} not in cache, have: {cached_urls.keys()}") @@ -26,14 +24,13 @@ class Response(io.StringIO): def info(self): return self.headers - + def geturl(self): return url - + content = open(file_locator(cached_urls[url]), "rt").read() return Response(content) - + with patch("rdflib.parser.urlopen", cached_urlopen): yield - diff --git a/workflowhub_graph/constants.py b/workflowhub_graph/constants.py index ae6fbc4..7a020eb 100644 --- a/workflowhub_graph/constants.py +++ b/workflowhub_graph/constants.py @@ -18,4 +18,3 @@ ZIP_ENDPOINT = "/workflows/{w_id}/ro_crate?version={w_version}" TARGET_FILE_NAME = "ro-crate-metadata.json" - diff --git a/workflowhub_graph/merge.py b/workflowhub_graph/merge.py index 036af5f..0de8d4f 100644 --- a/workflowhub_graph/merge.py +++ b/workflowhub_graph/merge.py @@ -1,4 +1,3 @@ - import argparse import glob import json @@ -29,17 +28,21 @@ def merge_all_files(pattern="data/*.json") -> rdflib.Graph: json_data = make_paths_absolute(json_data, BASE_URL, w_id) # TODO: make this actual caching, and pre-populate in the test - with patch_rdflib_urlopen(lambda x: "tests/test_data/ro-crate-context-1.0.json"): + with patch_rdflib_urlopen( + lambda x: "tests/test_data/ro-crate-context-1.0.json" + ): G.parse(data=json_data, format="json-ld") - + # TODO: set a total version - return G + return G if __name__ == "__main__": argparser = argparse.ArgumentParser() - argparser.add_argument("output_filename", help="The output filename.", default="merged.ttl") + argparser.add_argument( + "output_filename", help="The output filename.", default="merged.ttl" + ) args = argparser.parse_args() G = merge_all_files() - G.serialize(args.output_filename, format="ttl") \ No newline at end of file + G.serialize(args.output_filename, format="ttl")