Issue #424 Update openeo.processes to openeo-processes 2.0.0 rc1

Open-EO · Nov 28, 2023 · 12c49d2 · 12c49d2
1 parent 4b66d8e
commit 12c49d2
Show file tree

Hide file tree

Showing 10 changed files with 1,092 additions and 501 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 
+- Update autogenerated functions/methods in `openeo.processes` to definitions from openeo-processes project version 2.0.0-rc1.
+  This removes `create_raster_cube`, `fit_class_random_forest`, `fit_regr_random_forest` and `save_ml_model`.
+  Although removed from openeo-processes 2.0.0-rc1, support for `load_result`, `predict_random_forest` and `load_ml_model`
+  is preserved but deprecated. ([#424](https://github.com/Open-EO/openeo-python-client/issues/424))
+
 ### Removed
 
 ### Fixed

diff --git a/openeo/internal/processes/generator.py b/openeo/internal/processes/generator.py
@@ -112,14 +112,17 @@ def _hanging_indent(self, paragraph: str, indent="    ", width: int = DEFAULT_WI
 
 
 def collect_processes(sources: List[Union[Path, str]]) -> List[Process]:
-    processes = []
+    processes = {}
     for src in [Path(s) for s in sources]:
         if src.is_dir():
-            processes.extend(parse_all_from_dir(src))
+            to_add = parse_all_from_dir(src)
         else:
-            processes.append(Process.from_json_file(src))
-    processes.sort(key=lambda p: p.id)
-    return processes
+            to_add = [Process.from_json_file(src)]
+        for p in to_add:
+            if p.id in processes:
+                raise Exception(f"Duplicate source for process {p.id!r}")
+            processes[p.id] = p
+    return sorted(processes.values(), key=lambda p: p.id)
 
 
 def generate_process_py(processes: List[Process], output=sys.stdout, argv=None):

diff --git a/openeo/internal/processes/parse.py b/openeo/internal/processes/parse.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import json
+import typing
 from pathlib import Path
 from typing import Iterator, List, Union
 
@@ -66,19 +67,15 @@ def from_dict(cls, data: dict) -> Returns:
         return cls(description=data["description"], schema=Schema.from_dict(data["schema"]))
 
 
-class Process:
+class Process(typing.NamedTuple):
     """An openEO process"""
 
-    def __init__(
-            self, id: str, parameters: List[Parameter], returns: Returns,
-            description: str = "", summary: str = ""
-    ):
-        self.id = id
-        self.description = description
-        self.parameters = parameters
-        self.returns = returns
-        self.summary = summary
-        # TODO: more properties?
+    id: str
+    parameters: List[Parameter]
+    returns: Returns
+    description: str = ""
+    summary: str = ""
+    # TODO: more properties?
 
     @classmethod
     def from_dict(cls, data: dict) -> Process:

diff --git a/openeo/processes.py b/openeo/processes.py
diff --git a/specs/openeo-processes-legacy/README.md b/specs/openeo-processes-legacy/README.md
@@ -0,0 +1,3 @@
+Collection of process definitions of processes that were removed
+from `openeo-processes` project, but should not yet be removed from
+`openeo.processes`.
diff --git a/specs/openeo-processes-legacy/load_ml_model.json b/specs/openeo-processes-legacy/load_ml_model.json
@@ -0,0 +1,53 @@
+{
+    "id": "load_ml_model",
+    "summary": "Load a ML model",
+    "description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as  ``fit_regr_random_forest()`` and ``save_ml_model()``.",
+    "categories": [
+        "machine learning",
+        "import"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "id",
+            "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `ml-model` extension.",
+            "schema": [
+                {
+                    "title": "URL",
+                    "type": "string",
+                    "format": "uri",
+                    "subtype": "uri",
+                    "pattern": "^https?://"
+                },
+                {
+                    "title": "Batch Job ID",
+                    "description": "Loading a model by batch job ID is possible only if a single model has been saved by the job. Otherwise, you have to load a specific model from a batch job by URL.",
+                    "type": "string",
+                    "subtype": "job-id",
+                    "pattern": "^[\\w\\-\\.~]+$"
+                },
+                {
+                    "title": "User-uploaded File",
+                    "type": "string",
+                    "subtype": "file-path",
+                    "pattern": "^[^\r\n\\:'\"]+$"
+                }
+            ]
+        }
+    ],
+    "returns": {
+        "description": "A machine learning model to be used with machine learning processes such as ``predict_random_forest()``.",
+        "schema": {
+            "type": "object",
+            "subtype": "ml-model"
+        }
+    },
+    "links": [
+        {
+            "href": "https://github.com/stac-extensions/ml-model",
+            "title": "STAC ml-model extension",
+            "type": "text/html",
+            "rel": "about"
+        }
+    ]
+}
diff --git a/specs/openeo-processes-legacy/load_result.json b/specs/openeo-processes-legacy/load_result.json
@@ -0,0 +1,212 @@
+{
+    "id": "load_result",
+    "summary": "Load batch job results",
+    "description": "Loads batch job results and returns them as a processable data cube. A batch job result can be loaded by ID or URL:\n\n* **ID**: The identifier for a finished batch job. The job must have been submitted by the authenticated user on the back-end currently connected to.\n* **URL**: The URL to the STAC metadata for a batch job result. This is usually a signed URL that is provided by some back-ends since openEO API version 1.1.0 through the `canonical` link relation in the batch job result metadata.\n\nIf supported by the underlying metadata and file format, the data that is added to the data cube can be restricted with the parameters `spatial_extent`, `temporal_extent` and `bands`. If no data is available for the given extents, a `NoDataAvailable` exception is thrown.\n\n**Remarks:**\n\n* The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as specified in the metadata if the `bands` parameter is set to `null`.\n* If no additional parameter is specified this would imply that the whole data set is expected to be loaded. Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only load the data that is actually required after evaluating subsequent processes such as filters. This means that the values should be processed only after the data has been limited to the required extent and as a consequence also to a manageable size.",
+    "categories": [
+        "cubes",
+        "import"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "id",
+            "description": "The id of a batch job with results.",
+            "schema": [
+                {
+                    "title": "ID",
+                    "type": "string",
+                    "subtype": "job-id",
+                    "pattern": "^[\\w\\-\\.~]+$"
+                },
+                {
+                    "title": "URL",
+                    "type": "string",
+                    "format": "uri",
+                    "subtype": "uri",
+                    "pattern": "^https?://"
+                }
+            ]
+        },
+        {
+            "name": "spatial_extent",
+            "description": "Limits the data to load from the batch job result to the specified bounding box or polygons.\n\n* For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).\n* For vector data, the process loads the geometry into the data cube of the geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been provided.\n\nThe GeoJSON can be one of the following feature types:\n\n* A `Polygon` or `MultiPolygon` geometry,\n* a `Feature` with a `Polygon` or `MultiPolygon` geometry, or\n* a `FeatureCollection` containing at least one `Feature` with `Polygon` or `MultiPolygon` geometries.\n\nSet this parameter to `null` to set no limit for the spatial extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data.",
+            "schema": [
+                {
+                    "title": "Bounding Box",
+                    "type": "object",
+                    "subtype": "bounding-box",
+                    "required": [
+                        "west",
+                        "south",
+                        "east",
+                        "north"
+                    ],
+                    "properties": {
+                        "west": {
+                            "description": "West (lower left corner, coordinate axis 1).",
+                            "type": "number"
+                        },
+                        "south": {
+                            "description": "South (lower left corner, coordinate axis 2).",
+                            "type": "number"
+                        },
+                        "east": {
+                            "description": "East (upper right corner, coordinate axis 1).",
+                            "type": "number"
+                        },
+                        "north": {
+                            "description": "North (upper right corner, coordinate axis 2).",
+                            "type": "number"
+                        },
+                        "base": {
+                            "description": "Base (optional, lower left corner, coordinate axis 3).",
+                            "type": [
+                                "number",
+                                "null"
+                            ],
+                            "default": null
+                        },
+                        "height": {
+                            "description": "Height (optional, upper right corner, coordinate axis 3).",
+                            "type": [
+                                "number",
+                                "null"
+                            ],
+                            "default": null
+                        },
+                        "crs": {
+                            "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.",
+                            "anyOf": [
+                                {
+                                    "title": "EPSG Code",
+                                    "type": "integer",
+                                    "subtype": "epsg-code",
+                                    "minimum": 1000,
+                                    "examples": [
+                                        3857
+                                    ]
+                                },
+                                {
+                                    "title": "WKT2",
+                                    "type": "string",
+                                    "subtype": "wkt2-definition"
+                                }
+                            ],
+                            "default": 4326
+                        }
+                    }
+                },
+                {
+                    "title": "GeoJSON",
+                    "description": "Limits the data cube to the bounding box of the given geometries. For raster data, all pixels inside the bounding box that do not intersect with any of the polygons will be set to no data (`null`).\n\nThe GeoJSON type `GeometryCollection` is not supported. Empty geometries are ignored.",
+                    "type": "object",
+                    "subtype": "geojson"
+                },
+                {
+                    "title": "Vector data cube",
+                    "description": "Limits the data cube to the bounding box of the given geometries in the vector data cube. All pixels inside the bounding box that do not intersect with any of the polygons will be set to no data (`null`). Empty geometries are ignored.",
+                    "type": "object",
+                    "subtype": "datacube",
+                    "dimensions": [
+                        {
+                            "type": "geometry"
+                        }
+                    ]
+                },
+                {
+                    "title": "No filter",
+                    "description": "Don't filter spatially. All data is included in the data cube.",
+                    "type": "null"
+                }
+            ],
+            "default": null,
+            "optional": true
+        },
+        {
+            "name": "temporal_extent",
+            "description": "Limits the data to load from the batch job result to the specified left-closed temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array with exactly two elements:\n\n1. The first element is the start of the temporal interval. The specified instance in time is **included** in the interval.\n2. The second element is the end of the temporal interval. The specified instance in time is **excluded** from the interval.\n\nThe specified temporal strings follow [RFC 3339](https://www.rfc-editor.org/rfc/rfc3339.html). Also supports open intervals by setting one of the boundaries to `null`, but never both.\n\nSet this parameter to `null` to set no limit for the temporal extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead of using ``filter_temporal()`` directly after loading unbounded data.",
+            "schema": [
+                {
+                    "type": "array",
+                    "subtype": "temporal-interval",
+                    "minItems": 2,
+                    "maxItems": 2,
+                    "items": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "format": "date-time",
+                                "subtype": "date-time"
+                            },
+                            {
+                                "type": "string",
+                                "format": "date",
+                                "subtype": "date"
+                            },
+                            {
+                                "type": "string",
+                                "subtype": "year",
+                                "minLength": 4,
+                                "maxLength": 4,
+                                "pattern": "^\\d{4}$"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    },
+                    "examples": [
+                        [
+                            "2015-01-01T00:00:00Z",
+                            "2016-01-01T00:00:00Z"
+                        ],
+                        [
+                            "2015-01-01",
+                            "2016-01-01"
+                        ]
+                    ]
+                },
+                {
+                    "title": "No filter",
+                    "description": "Don't filter temporally. All data is included in the data cube.",
+                    "type": "null"
+                }
+            ],
+            "default": null,
+            "optional": true
+        },
+        {
+            "name": "bands",
+            "description": "Only adds the specified bands into the data cube so that bands that don't match the list of band names are not available. Applies to all dimensions of type `bands`.\n\nEither the unique band name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands) can be specified. If the unique band name and the common name conflict, the unique band name has a higher priority.\n\nThe order of the specified array defines the order of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the original order.\n\nIt is recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded data.",
+            "schema": [
+                {
+                    "type": "array",
+                    "minItems": 1,
+                    "items": {
+                        "type": "string",
+                        "subtype": "band-name"
+                    }
+                },
+                {
+                    "title": "No filter",
+                    "description": "Don't filter bands. All bands are included in the data cube.",
+                    "type": "null"
+                }
+            ],
+            "default": null,
+            "optional": true
+        }
+    ],
+    "returns": {
+        "description": "A data cube for further processing.",
+        "schema": {
+            "type": "object",
+            "subtype": "datacube"
+        }
+    },
+    "exceptions": {
+        "NoDataAvailable": {
+            "message": "There is no data available for the given extents."
+        }
+    }
+}
diff --git a/specs/openeo-processes-legacy/predict_random_forest.json b/specs/openeo-processes-legacy/predict_random_forest.json
@@ -0,0 +1,42 @@
+{
+    "id": "predict_random_forest",
+    "summary": "Predict values based on a Random Forest model",
+    "description": "Applies a Random Forest machine learning model to an array and predict a value for it.",
+    "categories": [
+        "machine learning",
+        "reducer"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "data",
+            "description": "An array of numbers.",
+            "schema": {
+                "type": "array",
+                "items": {
+                    "type": [
+                        "number",
+                        "null"
+                    ]
+                }
+            }
+        },
+        {
+            "name": "model",
+            "description": "A model object that can be trained with the processes ``fit_regr_random_forest()`` (regression) and ``fit_class_random_forest()`` (classification).",
+            "schema": {
+                "type": "object",
+                "subtype": "ml-model"
+            }
+        }
+    ],
+    "returns": {
+        "description": "The predicted value. Returns `null` if any of the given values in the array is a no-data value.",
+        "schema": {
+            "type": [
+                "number",
+                "null"
+            ]
+        }
+    }
+}