From 761b14701618d0690afac33e66ecb2d843320339 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Thu, 18 Nov 2021 15:59:59 +0100 Subject: [PATCH] New processes for random forest #295 --- meta/subtype-schemas.json | 6 +++ proposals/fit_regr_random_forest.json | 76 +++++++++++++++++++++++++++ proposals/predict_random_forest.json | 39 ++++++++++++++ tests/.words | 1 + 4 files changed, 122 insertions(+) create mode 100644 proposals/fit_regr_random_forest.json create mode 100644 proposals/predict_random_forest.json diff --git a/meta/subtype-schemas.json b/meta/subtype-schemas.json index b2a349bf..0bb33fe8 100644 --- a/meta/subtype-schemas.json +++ b/meta/subtype-schemas.json @@ -233,6 +233,12 @@ } } }, + "ml-model": { + "type": "object", + "subtype": "ml-model", + "title": "Machine Learning Model", + "description": "A machine learning model accompanied with STAC metadata, including the ml-model extension." + }, "output-format": { "type": "string", "subtype": "output-format", diff --git a/proposals/fit_regr_random_forest.json b/proposals/fit_regr_random_forest.json new file mode 100644 index 00000000..d6946b99 --- /dev/null +++ b/proposals/fit_regr_random_forest.json @@ -0,0 +1,76 @@ +{ + "id": "fit_regr_random_forest", + "summary": "Train a random forest regression model", + "description": "Executes the fit of a random forest regression based on the user input of target and predictors. The Random Forest regression model is based on the approach by Breiman (2001).", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "The input data for the regression model. The raster images that will be used as predictors for the Random Forest. Aggregated to the features (vectors) of the target input variable.", + "schema": { + "type": "object", + "subtype": "raster-cube" + } + }, + { + "name": "target", + "description": "The input data for the regression model. This will be vector cubes for each training site. This is associated with the target variable for the Random Forest Model. The Geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", + "schema": { + "type": "object", + "subtype": "vector-cube" + } + }, + { + "name": "training", + "description": "The amount of training data to be used in the regression. The sampling will be randomly through the data object. The remaining data will be used as test data for the validation.", + "schema": { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 100 + } + }, + { + "name": "num_trees", + "description": "The number of trees build within the Random Forest regression.", + "optional": true, + "default": 100, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "mtry", + "description": "Specifies how many split variables will be used at a node. Default value is `null`, which corresponds to the number of predictors divided by 3.", + "optional": true, + "default": null, + "schema": [ + { + "type": "integer", + "minimum": 1 + }, + { + "type": "null" + } + ] + } + ], + "returns": { + "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://doi.org/10.1023/A:1010933404324", + "title": "Breiman (2001): Random Forests", + "type": "text/html", + "rel": "about" + } + ] +} \ No newline at end of file diff --git a/proposals/predict_random_forest.json b/proposals/predict_random_forest.json new file mode 100644 index 00000000..90ccd6e6 --- /dev/null +++ b/proposals/predict_random_forest.json @@ -0,0 +1,39 @@ +{ + "id": "predict_random_forest", + "summary": "Predict values from a Random Forest model", + "description": "Applies a Random Forest Model to a raster cube objects. The raster data cube necessarily needs the same bands as the predictors in the model. Otherwise, an `IncompatibleBands` must be returned.", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "A raster cube with the bands corresponding to the predictors.", + "schema": { + "type": "object", + "subtype": "raster-cube" + } + }, + { + "name": "model", + "description": "A model object that can be trained with the ``fit_regr_random_forest()`` process.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + } + ], + "returns": { + "description": "A raster data cube with the prediction of the target variable based on the model.", + "schema": { + "type": "object", + "subtype": "raster-cube" + } + }, + "exceptions": { + "IncompatibleBands": { + "message": "The bands provided do not match the bands that the model has been trained for." + } + } +} \ No newline at end of file diff --git a/tests/.words b/tests/.words index b9fe6130..77cc432f 100644 --- a/tests/.words +++ b/tests/.words @@ -37,3 +37,4 @@ gdalwarp Lanczos sinc interpolants +Breiman