From 445c1042b8377d8613f024a742382978e5cb3fcc Mon Sep 17 00:00:00 2001 From: claxn Date: Wed, 22 Apr 2020 10:31:43 +0200 Subject: [PATCH 1/3] fixed bug in callback edge node order; added example in README; added sort test --- README.md | 215 +++++++++++++++++++++++ README.rst | 45 ----- src/openeo_pg_parser_python/graph.py | 4 +- src/openeo_pg_parser_python/translate.py | 14 +- tests/test_graph.py | 19 ++ 5 files changed, 243 insertions(+), 54 deletions(-) create mode 100644 README.md delete mode 100644 README.rst create mode 100644 tests/test_graph.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..b5e210a --- /dev/null +++ b/README.md @@ -0,0 +1,215 @@ +# openeo-pg-parser-python + +This package allows to parse an *openEO* process graph (JSON) to a traversable Python object (`graph`), describing process dependencies and contents. + + +## Installation + +### Install miniconda and clone repository + +``` +wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh +bash miniconda.sh -b -p $HOME/miniconda +export PATH="$HOME/miniconda/bin:$PATH" +git clone https://github.com/Open-EO/openeo-pg-parser-python.git +cd openeo-pg-parser-python + ``` + +### Create the conda environment + +``` +conda env create -f conda_environment.yml +``` + +### Install package in the conda environment + +``` +source activate openeo-pg-parser-python +python setup.py install +``` + +Change 'install' with 'develop' if you plan to further develop the package. + + +## Example + +Here, we show how an *openEO* process graph can be translated into a `graph` object. +An exemplary process graph is stored in a file named *"process_graph_example.json"* and is given below: +```json +{ + "s2a": { + "process_id": "load_collection", + "process_description": "Loading S2A data.", + "arguments": { + "id": "CGS_SENTINEL2_RADIOMETRY_V102_001", + "spatial_extent": { + "north": 48.40, + "south": 47.90, + "east": 16.84, + "west": 15.96 + }, + "temporal_extent": ["2017-09-05", "2017-10-01"] + } + }, + "ndvi": { + "process_id": "ndvi", + "process_description": "Calculate NDVI.", + "arguments": { + "data": {"from_node": "s2a"}, + "name": "ndvi" + } + }, + "min_time": { + "process_id": "reduce", + "process_description": "Take the minimum value in the time series.", + "arguments": { + "data": {"from_node": "ndvi"}, + "dimension": "temporal", + "reducer": { + "callback": { + "process_id": "min", + "process_description": "Calculate minimum", + "arguments": { + "data": {"from_argument": "data"} + }, + "result": true + } + } + } + }, + "output": { + "process_id": "save_result", + "description": "Save to disk", + "arguments": { + "data": {"from_node": "min_time"}, + "format": "Gtiff" + } + } +} +``` +To translate the JSON file into a python object, use: +```python +from openeo_pg_parser_python.translate import translate_process_graph + +pg_filepath = r"/eodc/private/tuwgeo/users/cnavacch/projects/openEO_pydevel/openeo-pg-parser-python/tests/process_graphs/test_1.json" +process_graph = translate_process_graph(pg_filepath) +``` +If you print the `graph` you get the information contained in each node: +``` +Node ID: s2a_0 +Node Name: s2a +{'arguments': {'id': 'CGS_SENTINEL2_RADIOMETRY_V102_001', + 'spatial_extent': {'east': 16.84, + 'north': 48.4, + 'south': 47.9, + 'west': 15.96}, + 'temporal_extent': ['2017-09-05', '2017-10-01']}, + 'process_description': 'Loading S2A data.', + 'process_id': 'load_collection'} + +Node ID: ndvi_1 +Node Name: ndvi +{'arguments': {'data': {'from_node': 's2a_0'}, 'name': 'ndvi'}, + 'process_description': 'Calculate NDVI.', + 'process_id': 'ndvi'} + +Node ID: min_time_2 +Node Name: min_time +{'arguments': {'data': {'from_node': 'ndvi_1'}, + 'dimension': 'temporal', + 'reducer': {'from_node': 'callback_3'}}, + 'process_description': 'Take the minimum value in the time series.', + 'process_id': 'reduce'} + +Node ID: callback_3 +Node Name: callback +{'arguments': {'data': {'from_node': 'ndvi_1'}}, + 'process_description': 'Calculate minimum', + 'process_id': 'min', + 'result': True} + +Node ID: output_4 +Node Name: output +{'arguments': {'data': {'from_node': 'min_time_2'}, 'format': 'Gtiff'}, + 'description': 'Save to disk', + 'process_id': 'save_result'} +``` +It also possible to sort the process graph by the dependency of each node +with `sorted_process_graph = process_graph.sort(by='dependency')`: +``` +Node ID: s2a_0 +Node Name: s2a +{'arguments': {'id': 'CGS_SENTINEL2_RADIOMETRY_V102_001', + 'spatial_extent': {'east': 16.84, + 'north': 48.4, + 'south': 47.9, + 'west': 15.96}, + 'temporal_extent': ['2017-09-05', '2017-10-01']}, + 'process_description': 'Loading S2A data.', + 'process_id': 'load_collection'} + +Node ID: ndvi_1 +Node Name: ndvi +{'arguments': {'data': {'from_node': 's2a_0'}, 'name': 'ndvi'}, + 'process_description': 'Calculate NDVI.', + 'process_id': 'ndvi'} + +Node ID: callback_3 +Node Name: callback +{'arguments': {'data': {'from_node': 'ndvi_1'}}, + 'process_description': 'Calculate minimum', + 'process_id': 'min', + 'result': True} + +Node ID: min_time_2 +Node Name: min_time +{'arguments': {'data': {'from_node': 'ndvi_1'}, + 'dimension': 'temporal', + 'reducer': {'from_node': 'callback_3'}}, + 'process_description': 'Take the minimum value in the time series.', + 'process_id': 'reduce'} + +Node ID: output_4 +Node Name: output +{'arguments': {'data': {'from_node': 'min_time_2'}, 'format': 'Gtiff'}, + 'description': 'Save to disk', + 'process_id': 'save_result'} +``` +If you are interested in a specific node, you can use Python indexing: +```python +print(sorted_process_graph['min_time_2']) +``` +which results in: +``` +Node ID: min_time_2 +Node Name: min_time +{'arguments': {'data': {'from_node': 'ndvi_1'}, + 'dimension': 'temporal', + 'reducer': {'from_node': 'callback_3'}}, + 'process_description': 'Take the minimum value in the time series.', + 'process_id': 'reduce'} +``` +A node has also offers access to its ancestors/parents/dependencies: +```python +print(sorted_process_graph['min_time_2'].dependencies) +``` + +``` +Node ID: ndvi_1 +Node Name: ndvi +{'arguments': {'data': {'from_node': 's2a_0'}, 'name': 'ndvi'}, + 'process_description': 'Calculate NDVI.', + 'process_id': 'ndvi'} + +Node ID: callback_3 +Node Name: callback +{'arguments': {'data': {'from_node': 'ndvi_1'}}, + 'process_description': 'Calculate minimum', + 'process_id': 'min', + 'result': True} +``` + +## Note + +This project has been set up using PyScaffold 3.1. For details and usage +information on PyScaffold see https://pyscaffold.org/. diff --git a/README.rst b/README.rst deleted file mode 100644 index 009813a..0000000 --- a/README.rst +++ /dev/null @@ -1,45 +0,0 @@ -======================= -openeo-pg-parser-python -======================= - - -The package parses an openEO process graph (JSON) to a traversable Python object, containing input and dependencies for each node. - - -Description -=========== - -1. Install miniconda and clone repository: ------------------------------------------- - -:: - - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - bash miniconda.sh -b -p $HOME/miniconda - export PATH="$HOME/miniconda/bin:$PATH" - git clone https://github.com/Open-EO/openeo-pg-parser-python.git - cd openeo-pg-parser-python - -2. Create the conda environment -------------------------------- - -:: - - conda env create -f conda_environment.yml - -3. Install package in the conda environment --------------------------------------------------------- - -:: - - source activate openeo-pg-parser-python - python setup.py install - -Change 'install' with 'develop' if you plan to further develop the package. - - -Note -==== - -This project has been set up using PyScaffold 3.1. For details and usage -information on PyScaffold see https://pyscaffold.org/. diff --git a/src/openeo_pg_parser_python/graph.py b/src/openeo_pg_parser_python/graph.py index e3612ae..4f3f1dd 100644 --- a/src/openeo_pg_parser_python/graph.py +++ b/src/openeo_pg_parser_python/graph.py @@ -441,10 +441,10 @@ def sort(self, by='dependency'): nodes_ordered = [] if by == "dependency": for node in self.nodes: - insert_idx = 0 + insert_idx = len(nodes_ordered) for node_dependency in node.dependencies: for idx, node_ordered in enumerate(nodes_ordered): - if (idx >= insert_idx) and (node_dependency.id == node_ordered.id): + if (idx <= insert_idx) and (node_dependency.id == node_ordered.id): insert_idx = idx + 1 # place the node after the dependency nodes_ordered.insert(insert_idx, node) else: diff --git a/src/openeo_pg_parser_python/translate.py b/src/openeo_pg_parser_python/translate.py index 03edf47..f656524 100644 --- a/src/openeo_pg_parser_python/translate.py +++ b/src/openeo_pg_parser_python/translate.py @@ -110,7 +110,7 @@ def walk_process_graph(process_graph, nodes, node_ids=None, level=0, prev_level= if node_ids: filtered_node_ids = [prev_node_id for prev_node_id in node_ids if prev_node_id] parent_node = nodes[filtered_node_ids[-1]] - edge_nodes = [parent_node, node] + edge_nodes = [node, parent_node] # for a callback the parent node comes after the node edge_id = "_".join([edge_node.id for edge_node in edge_nodes]) edge_name = "callback" edge = Edge(id=edge_id, name=edge_name, nodes=edge_nodes) @@ -302,7 +302,7 @@ def adjust_from_arguments(process_graph): for node in process_graph.nodes: keys_lineage = find_node_inputs(node.content, "from_argument") for key_lineage in keys_lineage: - nodes_lineage = process_graph.lineage(node, link="callback") + nodes_lineage = process_graph.lineage(node, link="callback", ancestors=False) # for callbacks the input lineage is inverted if nodes_lineage: root_node = nodes_lineage[-1] node_other = root_node.parent('data') @@ -335,12 +335,12 @@ def adjust_callbacks(process_graph): """ for node in process_graph.nodes: - node_descendants = node.descendants(link="callback") - if node_descendants: + node_ancestors = node.ancestors(link="callback") # for a callback the lineage is inverted, thus the ancestors + if node_ancestors: node_result = None - for node_descendant in node_descendants: - if ("result" in node_descendant.content.keys()) and node_descendant.content['result']: - node_result = node_descendant + for node_ancestor in node_ancestors: + if ("result" in node_ancestor.content.keys()) and node_ancestor.content['result']: + node_result = node_ancestor break if node_result: node.content = replace_callback(node.content, {'from_node': node_result.id}) diff --git a/tests/test_graph.py b/tests/test_graph.py new file mode 100644 index 0000000..be285a7 --- /dev/null +++ b/tests/test_graph.py @@ -0,0 +1,19 @@ +import os +import unittest +from openeo_pg_parser_python.translate import translate_process_graph + +from tests import PG_FOLDER + + +def test_sort_process_graph(): + """ Tests sorting of a process graph. """ + + graph = translate_process_graph(os.path.join(PG_FOLDER, "test_1.json")) + assert list(graph.ids) == ["s2a_0", "ndvi_1", "min_time_2", "callback_3", "output_4"] + + sorted_graph = graph.sort(by='dependency') + assert list(sorted_graph.ids) == ["s2a_0", "ndvi_1", "callback_3", "min_time_2", "output_4"] + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From b24fa99da3f4523a7f509add8f4ade2157627c06 Mon Sep 17 00:00:00 2001 From: claxn Date: Wed, 22 Apr 2020 10:56:55 +0200 Subject: [PATCH 2/3] updated README --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b5e210a..e0cce2a 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ To translate the JSON file into a python object, use: ```python from openeo_pg_parser_python.translate import translate_process_graph -pg_filepath = r"/eodc/private/tuwgeo/users/cnavacch/projects/openEO_pydevel/openeo-pg-parser-python/tests/process_graphs/test_1.json" +pg_filepath = "process_graph_example.json" process_graph = translate_process_graph(pg_filepath) ``` If you print the `graph` you get the information contained in each node: @@ -135,7 +135,10 @@ Node Name: output 'process_id': 'save_result'} ``` It also possible to sort the process graph by the dependency of each node -with `sorted_process_graph = process_graph.sort(by='dependency')`: +with: +```python +sorted_process_graph = process_graph.sort(by='dependency') +``` ``` Node ID: s2a_0 Node Name: s2a From e51a2826e9c41fbc9e20760c23f519aa70697fcb Mon Sep 17 00:00:00 2001 From: claxn Date: Wed, 22 Apr 2020 11:40:07 +0200 Subject: [PATCH 3/3] updated change log --- CHANGELOG.rst | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 226e6f5..1399e6e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,9 +2,22 @@ Changelog ========= -Version 0.1 -=========== +Version 1.0.0 +============= -- Feature A added -- FIX: nasty bug #1729 fixed -- add your changes here! +- Restructuring of graph classes and module setup. The following things changed in terms of the code: + - renamed `node.graph` to `node.content` + - all operations on a graph (dependencies, ancestors, lineage, ...) return now a subgraph + - a graph has two new properties: `ids` and `nodes`. `ids` are the node IDs and `nodes` the nodes. Both are views + - `nnodes` was removed and can be replaced by calling `len(graph)` + - new class method `from_list` converts a list of nodes to a graph + - `__getitem__` method in the graph class supports indexing by integer and node ID + - `get_node_by_name` method in the graph class returns the first node matching a given name + - `nodes_at_same_level` in the graph class was renamed and adapted to `find_siblings` (all nodes having the same parent) +- Additional tests + + +Version 0.0.1 +============= + +- First release for the openEO API 0.4