From 195f61b98ba03cd882ce1c8038c864e0c83b885f Mon Sep 17 00:00:00 2001
From: MartinuzziFrancesco <martinuzzi.francesco@gmail.com>
Date: Wed, 13 Dec 2023 12:39:27 +0100
Subject: [PATCH] formatting and version bump

---
 Project.toml                                |   2 +-
 README.md                                   |  54 +++++----
 docs/make.jl                                |  15 ++-
 docs/pages.jl                               |  28 ++---
 docs/src/api/esn.md                         |   7 +-
 docs/src/api/esn_drivers.md                 |   7 +-
 docs/src/api/esn_layers.md                  |  16 ++-
 docs/src/api/predict.md                     |   1 +
 docs/src/api/reca.md                        |   4 +-
 docs/src/api/states.md                      |   2 +
 docs/src/api/training.md                    |   5 +-
 docs/src/esn_tutorials/change_layers.md     |  60 ++++++----
 docs/src/esn_tutorials/deep_esn.md          |  69 ++++++-----
 docs/src/esn_tutorials/different_drivers.md | 125 ++++++++++++--------
 docs/src/esn_tutorials/hybrid.md            |  64 +++++-----
 docs/src/esn_tutorials/lorenz_basic.md      | 102 +++++++++-------
 docs/src/general/different_training.md      |  15 ++-
 docs/src/general/predictive_generative.md   |   5 +-
 docs/src/general/states_variation.md        |  16 ++-
 docs/src/index.md                           |  43 +++++--
 docs/src/reca_tutorials/reca.md             |  13 +-
 src/esn/echostatenetwork.jl                 |  68 +++++------
 src/esn/esn_input_layers.jl                 |  38 +++---
 src/esn/esn_predict.jl                      |  66 +++++------
 src/esn/esn_reservoir_drivers.jl            |  54 ++++-----
 src/esn/esn_reservoirs.jl                   |  42 +++----
 src/predict.jl                              |  20 ++--
 src/reca/reca.jl                            |  18 +--
 src/reca/reca_input_encodings.jl            |   8 +-
 src/states.jl                               |   2 +-
 src/train/linear_regression.jl              |  12 +-
 test/esn/test_drivers.jl                    |  48 ++++----
 test/esn/test_hybrid.jl                     |   4 +-
 test/esn/test_nla.jl                        |   4 +-
 test/esn/test_reservoirs.jl                 |   6 +-
 test/esn/test_states.jl                     |   2 +-
 test/esn/test_train.jl                      |   2 +-
 test/reca/test_predictive.jl                |   4 +-
 test/runtests.jl                            |  40 +++++--
 test/test_states.jl                         |   4 +-
 40 files changed, 626 insertions(+), 469 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3d89691c..0efa988f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "ReservoirComputing"
 uuid = "7c2d2b1e-3dd4-11ea-355a-8f6a8116e294"
 authors = ["Francesco Martinuzzi"]
-version = "0.9.4"
+version = "0.9.5"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
diff --git a/README.md b/README.md
index 8a30afa8..9172725e 100644
--- a/README.md
+++ b/README.md
@@ -2,13 +2,13 @@
 
 [![Join the chat at https://julialang.zulipchat.com #sciml-bridged](https://img.shields.io/static/v1?label=Zulip&message=chat&color=9558b2&labelColor=389826)](https://julialang.zulipchat.com/#narrow/stream/279055-sciml-bridged)
 [![Global Docs](https://img.shields.io/badge/docs-SciML-blue.svg)](https://docs.sciml.ai/ReservoirComputing/stable/)
- [![arXiv](https://img.shields.io/badge/arXiv-2204.05117-00b300.svg)](https://arxiv.org/abs/2204.05117)
+[![arXiv](https://img.shields.io/badge/arXiv-2204.05117-00b300.svg)](https://arxiv.org/abs/2204.05117)
 
 [![codecov](https://codecov.io/gh/SciML/ReservoirComputing.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/SciML/ReservoirComputing.jl)
 [![Build Status](https://github.com/SciML/ReservoirComputing.jl/workflows/CI/badge.svg)](https://github.com/SciML/ReservoirComputing.jl/actions?query=workflow%3ACI)
 [![Build status](https://badge.buildkite.com/db8f91b89a10ad79bbd1d9fdb1340e6f6602a1c0ed9496d4d0.svg)](https://buildkite.com/julialang/reservoircomputing-dot-jl)
 
-[![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
+[![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor%27s%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
 [![SciML Code Style](https://img.shields.io/static/v1?label=code%20style&message=SciML&color=9558b2&labelColor=389826)](https://github.com/SciML/SciMLStyle)
 
 ![rc_full_logo_large_white_cropped](https://user-images.githubusercontent.com/10376688/144242116-8243f58a-5ac6-4e0e-88d5-3409f00e20b4.png)
@@ -23,58 +23,68 @@ To illustrate the workflow of this library we will showcase how it is possible t
 using ReservoirComputing, OrdinaryDiffEq
 
 #lorenz system parameters
-u0 = [1.0,0.0,0.0]                       
-tspan = (0.0,200.0)                      
-p = [10.0,28.0,8/3]
+u0 = [1.0, 0.0, 0.0]
+tspan = (0.0, 200.0)
+p = [10.0, 28.0, 8 / 3]
 
 #define lorenz system
-function lorenz(du,u,p,t)
-    du[1] = p[1]*(u[2]-u[1])
-    du[2] = u[1]*(p[2]-u[3]) - u[2]
-    du[3] = u[1]*u[2] - p[3]*u[3]
+function lorenz(du, u, p, t)
+    du[1] = p[1] * (u[2] - u[1])
+    du[2] = u[1] * (p[2] - u[3]) - u[2]
+    du[3] = u[1] * u[2] - p[3] * u[3]
 end
 #solve and take data
-prob = ODEProblem(lorenz, u0, tspan, p)  
-data = solve(prob, ABM54(), dt=0.02)   
+prob = ODEProblem(lorenz, u0, tspan, p)
+data = solve(prob, ABM54(), dt = 0.02)
 
 shift = 300
 train_len = 5000
 predict_len = 1250
 
 #one step ahead for generative prediction
-input_data = data[:, shift:shift+train_len-1]
-target_data = data[:, shift+1:shift+train_len]
+input_data = data[:, shift:(shift + train_len - 1)]
+target_data = data[:, (shift + 1):(shift + train_len)]
 
-test = data[:,shift+train_len:shift+train_len+predict_len-1]
+test = data[:, (shift + train_len):(shift + train_len + predict_len - 1)]
 ```
+
 Now that we have the data we can initialize the ESN with the chosen parameters. Given that this is a quick example we are going to change the least amount of possible parameters. For more detailed examples and explanations of the functions please refer to the documentation.
+
 ```julia
 res_size = 300
-esn = ESN(input_data; 
-          reservoir = RandSparseReservoir(res_size, radius=1.2, sparsity=6/res_size),
-          input_layer = WeightedLayer(),
-          nla_type = NLAT2())
+esn = ESN(input_data;
+    reservoir = RandSparseReservoir(res_size, radius = 1.2, sparsity = 6 / res_size),
+    input_layer = WeightedLayer(),
+    nla_type = NLAT2())
 ```
 
 The echo state network can now be trained and tested. If not specified, the training will always be Ordinary Least Squares regression. The full range of training methods is detailed in the documentation.
+
 ```julia
 output_layer = train(esn, target_data)
 output = esn(Generative(predict_len), output_layer)
 ```
 
 The data is returned as a matrix, `output` in the code above, that contains the predicted trajectories. The results can now be easily plotted (for the actual script used to obtain this plot please refer to the documentation):
+
 ```julia
 using Plots
-plot(transpose(output),layout=(3,1), label="predicted")
-plot!(transpose(test),layout=(3,1), label="actual")
+plot(transpose(output), layout = (3, 1), label = "predicted")
+plot!(transpose(test), layout = (3, 1), label = "actual")
 ```
+
 ![lorenz_basic](https://user-images.githubusercontent.com/10376688/166227371-8bffa318-5c49-401f-9c64-9c71980cb3f7.png)
 
 One can also visualize the phase space of the attractor and the comparison with the actual one:
+
 ```julia
-plot(transpose(output)[:,1], transpose(output)[:,2], transpose(output)[:,3], label="predicted")
-plot!(transpose(test)[:,1], transpose(test)[:,2], transpose(test)[:,3], label="actual")
+plot(transpose(output)[:, 1],
+    transpose(output)[:, 2],
+    transpose(output)[:, 3],
+    label = "predicted")
+plot!(transpose(test)[:, 1], transpose(test)[:, 2], transpose(test)[:, 3], label = "actual")
 ```
+
 ![lorenz_attractor](https://user-images.githubusercontent.com/10376688/81470281-5a34b580-91ea-11ea-9eea-d2b266da19f4.png)
 
 ## Citing
diff --git a/docs/make.jl b/docs/make.jl
index 9fa9ec58..7d697952 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -8,13 +8,12 @@ ENV["GKSwstype"] = "100"
 include("pages.jl")
 
 makedocs(modules = [ReservoirComputing],
-         sitename = "ReservoirComputing.jl",
-         clean = true, doctest = false, linkcheck = true,
-         warnonly = [:missing_docs],
-         format = Documenter.HTML(
-                                  assets = ["assets/favicon.ico"],
-                                  canonical = "https://docs.sciml.ai/ReservoirComputing/stable/"),
-         pages = pages)
+    sitename = "ReservoirComputing.jl",
+    clean = true, doctest = false, linkcheck = true,
+    warnonly = [:missing_docs],
+    format = Documenter.HTML(assets = ["assets/favicon.ico"],
+        canonical = "https://docs.sciml.ai/ReservoirComputing/stable/"),
+    pages = pages)
 
 deploydocs(repo = "github.com/SciML/ReservoirComputing.jl.git";
-           push_preview = true)
+    push_preview = true)
diff --git a/docs/pages.jl b/docs/pages.jl
index fb96f752..309f76fa 100644
--- a/docs/pages.jl
+++ b/docs/pages.jl
@@ -1,21 +1,21 @@
 pages = [
     "ReservoirComputing.jl" => "index.md",
     "General Settings" => Any["Changing Training Algorithms" => "general/different_training.md",
-                              "Altering States" => "general/states_variation.md",
-                              "Generative vs Predictive" => "general/predictive_generative.md"],
+        "Altering States" => "general/states_variation.md",
+        "Generative vs Predictive" => "general/predictive_generative.md"],
     "Echo State Network Tutorials" => Any["Lorenz System Forecasting" => "esn_tutorials/lorenz_basic.md",
-                                          #"Mackey-Glass Forecasting on GPU" => "esn_tutorials/mackeyglass_basic.md",
-                                          "Using Different Layers" => "esn_tutorials/change_layers.md",
-                                          "Using Different Reservoir Drivers" => "esn_tutorials/different_drivers.md",
-                                          #"Using Different Training Methods" => "esn_tutorials/different_training.md",
-                                          "Deep Echo State Networks" => "esn_tutorials/deep_esn.md",
-                                          "Hybrid Echo State Networks" => "esn_tutorials/hybrid.md"],
+        #"Mackey-Glass Forecasting on GPU" => "esn_tutorials/mackeyglass_basic.md",
+        "Using Different Layers" => "esn_tutorials/change_layers.md",
+        "Using Different Reservoir Drivers" => "esn_tutorials/different_drivers.md",
+        #"Using Different Training Methods" => "esn_tutorials/different_training.md",
+        "Deep Echo State Networks" => "esn_tutorials/deep_esn.md",
+        "Hybrid Echo State Networks" => "esn_tutorials/hybrid.md"],
     "Reservoir Computing with Cellular Automata" => "reca_tutorials/reca.md",
     "API Documentation" => Any["Training Algorithms" => "api/training.md",
-                               "States Modifications" => "api/states.md",
-                               "Prediction Types" => "api/predict.md",
-                               "Echo State Networks" => "api/esn.md",
-                               "ESN Layers" => "api/esn_layers.md",
-                               "ESN Drivers" => "api/esn_drivers.md",
-                               "ReCA" => "api/reca.md"],
+        "States Modifications" => "api/states.md",
+        "Prediction Types" => "api/predict.md",
+        "Echo State Networks" => "api/esn.md",
+        "ESN Layers" => "api/esn_layers.md",
+        "ESN Drivers" => "api/esn_drivers.md",
+        "ReCA" => "api/reca.md"],
 ]
diff --git a/docs/src/api/esn.md b/docs/src/api/esn.md
index c7e5aa12..1caacdd1 100644
--- a/docs/src/api/esn.md
+++ b/docs/src/api/esn.md
@@ -1,4 +1,5 @@
 # Echo State Networks
+
 The core component of an ESN is the `ESN` type. It represents the entire Echo State Network and includes parameters for configuring the reservoir, input scaling, and output weights. Here's the documentation for the `ESN` type:
 
 ```@docs
@@ -6,18 +7,22 @@ The core component of an ESN is the `ESN` type. It represents the entire Echo St
 ```
 
 ## Variations
+
 In addition to the standard `ESN` model, there are variations that allow for deeper customization of the underlying model. Currently, there are two available variations: `Default` and `Hybrid`. These variations provide different ways to configure the ESN. Here's the documentation for the variations:
 
 ```@docs
     Default
     Hybrid
 ```
+
 The `Hybrid` variation is the most complex option and offers additional customization. Note that more variations may be added in the future to provide even greater flexibility.
 
 ## Training
 
 To train an ESN model, you can use the `train` function. It takes the ESN model, training data, and other optional parameters as input and returns a trained model. Here's the documentation for the train function:
+
 ```@docs
     train
 ```
-With these components and variations, you can configure and train ESN models for various time series and sequential data prediction tasks.
\ No newline at end of file
+
+With these components and variations, you can configure and train ESN models for various time series and sequential data prediction tasks.
diff --git a/docs/src/api/esn_drivers.md b/docs/src/api/esn_drivers.md
index a11ec35b..0bf0a388 100644
--- a/docs/src/api/esn_drivers.md
+++ b/docs/src/api/esn_drivers.md
@@ -1,13 +1,16 @@
 # ESN Drivers
+
 ```@docs
     RNN
     MRNN
     GRU
 ```
-The ```GRU``` driver also provides the user with the choice of the possible variants:
+
+The `GRU` driver also provides the user with the choice of the possible variants:
+
 ```@docs
     FullyGated
     Minimal
 ```
-Please refer to the original papers for more detail about these architectures.
 
+Please refer to the original papers for more detail about these architectures.
diff --git a/docs/src/api/esn_layers.md b/docs/src/api/esn_layers.md
index 76be5268..bb3b53de 100644
--- a/docs/src/api/esn_layers.md
+++ b/docs/src/api/esn_layers.md
@@ -1,6 +1,7 @@
 # ESN Layers
 
 ## Input Layers
+
 ```@docs
     WeightedLayer
     DenseLayer
@@ -9,16 +10,22 @@
     MinimumLayer
     NullLayer
 ```
-The signs in the ```MinimumLayer``` are chosen based on the following methods:
+
+The signs in the `MinimumLayer` are chosen based on the following methods:
+
 ```@docs
     BernoulliSample
     IrrationalSample
 ```
+
 To derive the matrix one can call the following function:
+
 ```@docs
     create_layer
 ```
-To create new input layers, it suffices to define a new struct containing the needed parameters of the new input layer. This struct will need to be an ```AbstractLayer```, so the ```create_layer``` function can be dispatched over it. The workflow should follow this snippet:
+
+To create new input layers, it suffices to define a new struct containing the needed parameters of the new input layer. This struct will need to be an `AbstractLayer`, so the `create_layer` function can be dispatched over it. The workflow should follow this snippet:
+
 ```julia
 #creation of the new struct for the layer
 struct MyNewLayer <: AbstractLayer
@@ -32,6 +39,7 @@ end
 ```
 
 ## Reservoirs
+
 ```@docs
     RandSparseReservoir
     PseudoSVDReservoir
@@ -43,11 +51,13 @@ end
 ```
 
 Like for the input layers, to actually build the matrix of the reservoir, one can call the following function:
+
 ```@docs
     create_reservoir
 ```
 
-To create a new reservoir, the procedure is similar to the one for the input layers. First, the definition of the new struct of type ```AbstractReservoir``` with the reservoir parameters is needed. Then the dispatch over the ```create_reservoir``` function makes the model actually build the reservoir matrix. An example of the workflow is given in the following snippet:
+To create a new reservoir, the procedure is similar to the one for the input layers. First, the definition of the new struct of type `AbstractReservoir` with the reservoir parameters is needed. Then the dispatch over the `create_reservoir` function makes the model actually build the reservoir matrix. An example of the workflow is given in the following snippet:
+
 ```julia
 #creation of the new struct for the reservoir
 struct MyNewReservoir <: AbstractReservoir
diff --git a/docs/src/api/predict.md b/docs/src/api/predict.md
index cf3699a4..5409d78e 100644
--- a/docs/src/api/predict.md
+++ b/docs/src/api/predict.md
@@ -1,4 +1,5 @@
 # Prediction Types
+
 ```@docs
     Generative
     Predictive
diff --git a/docs/src/api/reca.md b/docs/src/api/reca.md
index dcbc86df..48f134dd 100644
--- a/docs/src/api/reca.md
+++ b/docs/src/api/reca.md
@@ -1,11 +1,13 @@
 # Reservoir Computing with Cellular Automata
+
 ```@docs
     RECA
 ```
 
 The input encodings are the equivalent of the input matrices of the ESNs. These are the available encodings:
+
 ```@docs
     RandomMapping
 ```
 
-The training and prediction follow the same workflow as the ESN. It is important to note that currently we were unable to find any papers using these models with a ```Generative``` approach for the prediction, so full support is given only to the ```Predictive``` method.
+The training and prediction follow the same workflow as the ESN. It is important to note that currently we were unable to find any papers using these models with a `Generative` approach for the prediction, so full support is given only to the `Predictive` method.
diff --git a/docs/src/api/states.md b/docs/src/api/states.md
index 282863e8..5a4e5686 100644
--- a/docs/src/api/states.md
+++ b/docs/src/api/states.md
@@ -1,6 +1,7 @@
 # States Modifications
 
 ## Padding and Estension
+
 ```@docs
     StandardStates
     ExtendedStates
@@ -9,6 +10,7 @@
 ```
 
 ## Non Linear Transformations
+
 ```@docs
     NLADefault
     NLAT1
diff --git a/docs/src/api/training.md b/docs/src/api/training.md
index b34f046b..a11b2034 100644
--- a/docs/src/api/training.md
+++ b/docs/src/api/training.md
@@ -1,13 +1,16 @@
 # Training Algorithms
 
 ## Linear Models
+
 ```@docs
     StandardRidge
     LinearModel
 ```
 
 ## Gaussian Regression
+
 Currently, v0.9 is unavailable.
 
 ## Support Vector Regression
-Support Vector Regression is possible using a direct call to [LIBSVM](https://github.com/JuliaML/LIBSVM.jl) regression methods. Instead of a wrapper, please refer to the use of ```LIBSVM.AbstractSVR``` in the original library.
+
+Support Vector Regression is possible using a direct call to [LIBSVM](https://github.com/JuliaML/LIBSVM.jl) regression methods. Instead of a wrapper, please refer to the use of `LIBSVM.AbstractSVR` in the original library.
diff --git a/docs/src/esn_tutorials/change_layers.md b/docs/src/esn_tutorials/change_layers.md
index 0a659cd5..5cfb65cf 100644
--- a/docs/src/esn_tutorials/change_layers.md
+++ b/docs/src/esn_tutorials/change_layers.md
@@ -1,13 +1,16 @@
 # Using Different Layers
+
 A great deal of effort in the ESNs field is devoted to finding the ideal construction for the reservoir matrices. With a simple interface using ReservoirComputing.jl it is possible to leverage the currently implemented matrix construction methods for both the reservoir and the input layer. On this page, it is showcased how it is possible to change both of these layers.
 
 The `input_init` keyword argument provided with the `ESN` constructor allows for changing the input layer. The layers provided in ReservoirComputing.jl are the following:
-- ```WeightedLayer(scaling)```
-- ```DenseLayer(scaling)```
-- ```SparseLayer(scaling, sparsity)```
-- ```MinimumLayer(weight, sampling)```
-- ```InformedLayer(model_in_size; scaling=0.1, gamma=0.5)```
-In addition, the user can define a custom layer following this workflow:
+
+  - `WeightedLayer(scaling)`
+  - `DenseLayer(scaling)`
+  - `SparseLayer(scaling, sparsity)`
+  - `MinimumLayer(weight, sampling)`
+  - `InformedLayer(model_in_size; scaling=0.1, gamma=0.5)`
+    In addition, the user can define a custom layer following this workflow:
+
 ```julia
 #creation of the new struct for the layer
 struct MyNewLayer <: AbstractLayer
@@ -19,14 +22,17 @@ function create_layer(input_layer::MyNewLayer, res_size, in_size)
     #the new algorithm to build the input layer goes here
 end
 ```
+
 Similarly the `reservoir_init` keyword argument provides the possibility to change the construction for the reservoir matrix. The available reservoir are:
-- ```RandSparseReservoir(res_size, radius, sparsity)```
-- ```PseudoSVDReservoir(res_size, max_value, sparsity, sorted, reverse_sort)```
-- ```DelayLineReservoir(res_size, weight)```
-- ```DelayLineBackwardReservoir(res_size, weight, fb_weight)```
-- ```SimpleCycleReservoir(res_size, weight)```
-- ```CycleJumpsReservoir(res_size, cycle_weight, jump_weight, jump_size)```
-And, like before, it is possible to build a custom reservoir by following this workflow:
+
+  - `RandSparseReservoir(res_size, radius, sparsity)`
+  - `PseudoSVDReservoir(res_size, max_value, sparsity, sorted, reverse_sort)`
+  - `DelayLineReservoir(res_size, weight)`
+  - `DelayLineBackwardReservoir(res_size, weight, fb_weight)`
+  - `SimpleCycleReservoir(res_size, weight)`
+  - `CycleJumpsReservoir(res_size, cycle_weight, jump_weight, jump_size)`
+    And, like before, it is possible to build a custom reservoir by following this workflow:
+
 ```julia
 #creation of the new struct for the reservoir
 struct MyNewReservoir <: AbstractReservoir
@@ -40,9 +46,11 @@ end
 ```
 
 ## Example of a minimally complex ESN
+
 Using [^1] and [^2] as references, this section will provide an example of how to change both the input layer and the reservoir for ESNs. The full script for this example can be found [here](https://github.com/MartinuzziFrancesco/reservoir-computing-examples/blob/main/change_layers/layers.jl). This example was run on Julia v1.7.2.
 
 The task for this example will be the one step ahead prediction of the Henon map. To obtain the data, one can leverage the package [DynamicalSystems.jl](https://juliadynamics.github.io/DynamicalSystems.jl/dev/). The data is scaled to be between -1 and 1.
+
 ```@example mesn
 using PredefinedDynamicalSystems
 train_len = 3000
@@ -51,13 +59,13 @@ predict_len = 2000
 ds = PredefinedDynamicalSystems.henon()
 traj, time = trajectory(ds, 7000)
 data = Matrix(traj)'
-data = (data .-0.5) .* 2
+data = (data .- 0.5) .* 2
 shift = 200
 
-training_input = data[:, shift:shift+train_len-1]
-training_target = data[:, shift+1:shift+train_len]
-testing_input = data[:,shift+train_len:shift+train_len+predict_len-1]
-testing_target = data[:,shift+train_len+1:shift+train_len+predict_len]
+training_input = data[:, shift:(shift + train_len - 1)]
+training_target = data[:, (shift + 1):(shift + train_len)]
+testing_input = data[:, (shift + train_len):(shift + train_len + predict_len - 1)]
+testing_target = data[:, (shift + train_len + 1):(shift + train_len + predict_len)]
 ```
 
 Now it is possible to define the input layers and reservoirs we want to compare and run the comparison in a simple for loop. The accuracy will be tested using the mean squared deviation `msd` from [StatsBase](https://juliastats.org/StatsBase.jl/stable/).
@@ -66,11 +74,14 @@ Now it is possible to define the input layers and reservoirs we want to compare
 using ReservoirComputing, StatsBase
 
 res_size = 300
-input_layer = [MinimumLayer(0.85, IrrationalSample()), MinimumLayer(0.95, IrrationalSample())]
-reservoirs = [SimpleCycleReservoir(res_size, 0.7), 
-     CycleJumpsReservoir(res_size, cycle_weight=0.7, jump_weight=0.2, jump_size=5)]
+input_layer = [
+    MinimumLayer(0.85, IrrationalSample()),
+    MinimumLayer(0.95, IrrationalSample()),
+]
+reservoirs = [SimpleCycleReservoir(res_size, 0.7),
+    CycleJumpsReservoir(res_size, cycle_weight = 0.7, jump_weight = 0.2, jump_size = 5)]
 
-for i=1:length(reservoirs)
+for i in 1:length(reservoirs)
     esn = ESN(training_input;
         input_layer = input_layer[i],
         reservoir = reservoirs[i])
@@ -79,11 +90,10 @@ for i=1:length(reservoirs)
     println(msd(testing_target, output))
 end
 ```
-As it is possible to see, changing layers in ESN models is straightforward. Be sure to check the API documentation for a full list of reservoirs and layers.
 
+As it is possible to see, changing layers in ESN models is straightforward. Be sure to check the API documentation for a full list of reservoirs and layers.
 
 ## Bibliography
-[^1]: Rodan, Ali, and Peter Tiňo. “Simple deterministically constructed cycle reservoirs with regular jumps.” Neural computation 24.7 (2012): 1822-1852.
 
+[^1]: Rodan, Ali, and Peter Tiňo. “Simple deterministically constructed cycle reservoirs with regular jumps.” Neural computation 24.7 (2012): 1822-1852.
 [^2]: Rodan, Ali, and Peter Tiňo. “Minimum complexity echo state network.” IEEE transactions on neural networks 22.1 (2010): 131-144.
-
diff --git a/docs/src/esn_tutorials/deep_esn.md b/docs/src/esn_tutorials/deep_esn.md
index 6619f722..bd4f7b46 100644
--- a/docs/src/esn_tutorials/deep_esn.md
+++ b/docs/src/esn_tutorials/deep_esn.md
@@ -1,24 +1,26 @@
 # Deep Echo State Networks
 
-Deep Echo State Network architectures started to gain some traction recently. In this guide, we illustrate how it is possible to use ReservoirComputing.jl to build a deep ESN. 
+Deep Echo State Network architectures started to gain some traction recently. In this guide, we illustrate how it is possible to use ReservoirComputing.jl to build a deep ESN.
 
 The network implemented in this library is taken from [^1]. It works by stacking reservoirs on top of each other, feeding the output from one into the next. The states are obtained by merging all the inner states of the stacked reservoirs. For a more in-depth explanation, refer to the paper linked above. The full script for this example can be found [here](https://github.com/MartinuzziFrancesco/reservoir-computing-examples/blob/main/deep-esn/deepesn.jl). This example was run on Julia v1.7.2.
 
 ## Lorenz Example
+
 For this example, we are going to reuse the Lorenz data used in the [Lorenz System Forecasting](@ref) example.
+
 ```@example deep_lorenz
 using OrdinaryDiffEq
 
 #define lorenz system
-function lorenz!(du,u,p,t)
-    du[1] = 10.0*(u[2]-u[1])
-    du[2] = u[1]*(28.0-u[3]) - u[2]
-    du[3] = u[1]*u[2] - (8/3)*u[3]
+function lorenz!(du, u, p, t)
+    du[1] = 10.0 * (u[2] - u[1])
+    du[2] = u[1] * (28.0 - u[3]) - u[2]
+    du[3] = u[1] * u[2] - (8 / 3) * u[3]
 end
 
 #solve and take data
-prob = ODEProblem(lorenz!, [1.0,0.0,0.0], (0.0,200.0))
-data = solve(prob, ABM54(), dt=0.02)
+prob = ODEProblem(lorenz!, [1.0, 0.0, 0.0], (0.0, 200.0))
+data = solve(prob, ABM54(), dt = 0.02)
 
 #determine shift length, training length and prediction length
 shift = 300
@@ -26,22 +28,23 @@ train_len = 5000
 predict_len = 1250
 
 #split the data accordingly
-input_data = data[:, shift:shift+train_len-1]
-target_data = data[:, shift+1:shift+train_len]
-test_data = data[:,shift+train_len+1:shift+train_len+predict_len]
+input_data = data[:, shift:(shift + train_len - 1)]
+target_data = data[:, (shift + 1):(shift + train_len)]
+test_data = data[:, (shift + train_len + 1):(shift + train_len + predict_len)]
 ```
 
-Again, it is *important* to notice that the data needs to be formatted in a matrix, with the features as rows and time steps as columns, as in this example. This is needed even if the time series consists of single values. 
+Again, it is *important* to notice that the data needs to be formatted in a matrix, with the features as rows and time steps as columns, as in this example. This is needed even if the time series consists of single values.
+
+The construction of the ESN is also really similar. The only difference is that the reservoir can be fed as an array of reservoirs.
 
-The construction of the ESN is also really similar. The only difference is that the reservoir can be fed as an array of reservoirs. 
 ```@example deep_lorenz
 using ReservoirComputing
 
-reservoirs = [RandSparseReservoir(99, radius=1.1, sparsity=0.1),
-    RandSparseReservoir(100, radius=1.2, sparsity=0.1),
-    RandSparseReservoir(200, radius=1.4, sparsity=0.1)]
+reservoirs = [RandSparseReservoir(99, radius = 1.1, sparsity = 0.1),
+    RandSparseReservoir(100, radius = 1.2, sparsity = 0.1),
+    RandSparseReservoir(200, radius = 1.4, sparsity = 0.1)]
 
-esn = ESN(input_data; 
+esn = ESN(input_data;
     variation = Default(),
     reservoir = reservoirs,
     input_layer = DenseLayer(),
@@ -55,35 +58,39 @@ As it is possible to see, different sizes can be chosen for the different reserv
 In addition to using the provided functions for the construction of the layers, the user can also choose to build their own matrix, or array of matrices, and feed that into the `ESN` in the same way.
 
 The training and prediction follow the usual framework:
+
 ```@example deep_lorenz
-training_method = StandardRidge(0.0) 
+training_method = StandardRidge(0.0)
 output_layer = train(esn, target_data, training_method)
 
 output = esn(Generative(predict_len), output_layer)
 ```
+
 Plotting the results:
+
 ```@example deep_lorenz
 using Plots
 
 ts = 0.0:0.02:200.0
 lorenz_maxlyap = 0.9056
-predict_ts = ts[shift+train_len+1:shift+train_len+predict_len]
-lyap_time = (predict_ts .- predict_ts[1])*(1/lorenz_maxlyap)
-
-p1 = plot(lyap_time, [test_data[1,:] output[1,:]], label = ["actual" "predicted"], 
-    ylabel = "x(t)", linewidth=2.5, xticks=false, yticks = -15:15:15);
-p2 = plot(lyap_time, [test_data[2,:] output[2,:]], label = ["actual" "predicted"], 
-    ylabel = "y(t)", linewidth=2.5, xticks=false, yticks = -20:20:20);
-p3 = plot(lyap_time, [test_data[3,:] output[3,:]], label = ["actual" "predicted"], 
-    ylabel = "z(t)", linewidth=2.5, xlabel = "max(λ)*t", yticks = 10:15:40);
-
-
-plot(p1, p2, p3, plot_title = "Lorenz System Coordinates", 
-    layout=(3,1), xtickfontsize = 12, ytickfontsize = 12, xguidefontsize=15, yguidefontsize=15,
-    legendfontsize=12, titlefontsize=20)
+predict_ts = ts[(shift + train_len + 1):(shift + train_len + predict_len)]
+lyap_time = (predict_ts .- predict_ts[1]) * (1 / lorenz_maxlyap)
+
+p1 = plot(lyap_time, [test_data[1, :] output[1, :]], label = ["actual" "predicted"],
+    ylabel = "x(t)", linewidth = 2.5, xticks = false, yticks = -15:15:15);
+p2 = plot(lyap_time, [test_data[2, :] output[2, :]], label = ["actual" "predicted"],
+    ylabel = "y(t)", linewidth = 2.5, xticks = false, yticks = -20:20:20);
+p3 = plot(lyap_time, [test_data[3, :] output[3, :]], label = ["actual" "predicted"],
+    ylabel = "z(t)", linewidth = 2.5, xlabel = "max(λ)*t", yticks = 10:15:40);
+
+plot(p1, p2, p3, plot_title = "Lorenz System Coordinates",
+    layout = (3, 1), xtickfontsize = 12, ytickfontsize = 12, xguidefontsize = 15,
+    yguidefontsize = 15,
+    legendfontsize = 12, titlefontsize = 20)
 ```
 
 Note that there is a known bug at the moment with using `WeightedLayer` as the input layer with the deep ESN. We are in the process of investigating and solving it. The leak coefficient for the reservoirs has to always be the same in the current implementation. This is also something we are actively looking into expanding.
 
 ## Documentation
+
 [^1]: Gallicchio, Claudio, and Alessio Micheli. "_Deep echo state network (deepesn): A brief survey._" arXiv preprint arXiv:1712.04323 (2017).
diff --git a/docs/src/esn_tutorials/different_drivers.md b/docs/src/esn_tutorials/different_drivers.md
index 9b9fcd55..0a669451 100644
--- a/docs/src/esn_tutorials/different_drivers.md
+++ b/docs/src/esn_tutorials/different_drivers.md
@@ -1,41 +1,51 @@
 # Using Different Reservoir Drivers
+
 While the original implementation of the Echo State Network implemented the model using the equations of Recurrent Neural Networks to obtain non-linearity in the reservoir, other variations have been proposed in recent years. More specifically, the different drivers implemented in ReservoirComputing.jl are the multiple activation function RNN `MRNN()` and the Gated Recurrent Unit `GRU()`. To change them, it suffices to give the chosen method to the `ESN` keyword argument `reservoir_driver`. In this section, some examples, of their usage will be given, as well as a brief introduction to their equations.
 
 ## Multiple Activation Function RNN
+
 Based on the double activation function ESN (DAFESN) proposed in [^1], the Multiple Activation Function ESN expands the idea and allows a custom number of activation functions to be used in the reservoir dynamics. This can be thought of as a linear combination of multiple activation functions with corresponding parameters.
+
 ```math
 \mathbf{x}(t+1) = (1-\alpha)\mathbf{x}(t) + \lambda_1 f_1(\mathbf{W}\mathbf{x}(t)+\mathbf{W}_{in}\mathbf{u}(t)) + \dots + \lambda_D f_D(\mathbf{W}\mathbf{x}(t)+\mathbf{W}_{in}\mathbf{u}(t))
 ```
+
 where ``D`` is the number of activation functions and respective parameters chosen.
 
-The method to call to use the multiple activation function ESN is `MRNN(activation_function, leaky_coefficient, scaling_factor)`. The arguments can be used as both `args` and `kwargs`. `activation_function` and `scaling_factor` have to be vectors (or tuples) containing the chosen activation functions and respective scaling factors (``f_1,...,f_D`` and ``\lambda_1,...,\lambda_D`` following the nomenclature introduced above). The `leaky_coefficient` represents ``\alpha`` and it is a single value. 
+The method to call to use the multiple activation function ESN is `MRNN(activation_function, leaky_coefficient, scaling_factor)`. The arguments can be used as both `args` and `kwargs`. `activation_function` and `scaling_factor` have to be vectors (or tuples) containing the chosen activation functions and respective scaling factors (``f_1,...,f_D`` and ``\lambda_1,...,\lambda_D`` following the nomenclature introduced above). The `leaky_coefficient` represents ``\alpha`` and it is a single value.
 
 Starting with the example, the data used is based on the following function based on the DAFESN paper [^1]. A full script of the example is available [here](https://github.com/MartinuzziFrancesco/reservoir-computing-examples/blob/main/change_drivers/mrnn/mrnn.jl). This example was run on Julia v1.7.2.
+
 ```@example mrnn
-u(t) = sin(t)+sin(0.51*t)+sin(0.22*t)+sin(0.1002*t)+sin(0.05343*t)
+u(t) = sin(t) + sin(0.51 * t) + sin(0.22 * t) + sin(0.1002 * t) + sin(0.05343 * t)
 ```
 
 For this example, the type of prediction will be one step ahead. The metric used to assure a good prediction will be the normalized root-mean-square deviation `rmsd` from [StatsBase](https://juliastats.org/StatsBase.jl/stable/). Like in the other examples, first it is needed to gather the data:
+
 ```@example mrnn
 train_len = 3000
 predict_len = 2000
 shift = 1
 
 data = u.(collect(0.0:0.01:500))
-training_input = reduce(hcat, data[shift:shift+train_len-1])
-training_target = reduce(hcat, data[shift+1:shift+train_len])
-testing_input = reduce(hcat, data[shift+train_len:shift+train_len+predict_len-1])
-testing_target = reduce(hcat, data[shift+train_len+1:shift+train_len+predict_len])
+training_input = reduce(hcat, data[shift:(shift + train_len - 1)])
+training_target = reduce(hcat, data[(shift + 1):(shift + train_len)])
+testing_input = reduce(hcat,
+    data[(shift + train_len):(shift + train_len + predict_len - 1)])
+testing_target = reduce(hcat,
+    data[(shift + train_len + 1):(shift + train_len + predict_len)])
 ```
 
 To follow the paper more closely, it is necessary to define a couple of activation functions. The numbering of them follows the ones in the paper. Of course, one can also use any custom-defined function, available in the base language or any activation function from [NNlib](https://fluxml.ai/Flux.jl/stable/models/nnlib/#Activation-Functions).
+
 ```@example mrnn
-f2(x) = (1-exp(-x))/(2*(1+exp(-x)))
-f3(x) = (2/pi)*atan((pi/2)*x)
-f4(x) = x/sqrt(1+x*x)
+f2(x) = (1 - exp(-x)) / (2 * (1 + exp(-x)))
+f3(x) = (2 / pi) * atan((pi / 2) * x)
+f4(x) = x / sqrt(1 + x * x)
 ```
 
 It is now possible to build different drivers, using the parameters suggested by the paper. Also, in this instance, the numbering follows the test cases of the paper. In the end, a simple for loop is implemented to compare the different drivers and activation functions.
+
 ```@example mrnn
 using ReservoirComputing, Random, StatsBase
 
@@ -47,14 +57,14 @@ base_case = RNN(tanh, 0.85)
 
 #MRNN() test cases
 #Parameter given as kwargs
-case3 = MRNN(activation_function=[tanh, f2], 
-    leaky_coefficient=0.85, 
-    scaling_factor=[0.5, 0.3])
+case3 = MRNN(activation_function = [tanh, f2],
+    leaky_coefficient = 0.85,
+    scaling_factor = [0.5, 0.3])
 
 #Parameter given as kwargs
-case4 = MRNN(activation_function=[tanh, f3], 
-    leaky_coefficient=0.9, 
-    scaling_factor=[0.45, 0.35])
+case4 = MRNN(activation_function = [tanh, f3],
+    leaky_coefficient = 0.9,
+    scaling_factor = [0.45, 0.35])
 
 #Parameter given as args
 case5 = MRNN([tanh, f4], 0.9, [0.43, 0.13])
@@ -63,23 +73,26 @@ case5 = MRNN([tanh, f4], 0.9, [0.43, 0.13])
 test_cases = [base_case, case3, case4, case5]
 for case in test_cases
     esn = ESN(training_input,
-        input_layer = WeightedLayer(scaling=0.3),
-        reservoir = RandSparseReservoir(100, radius=0.4),
+        input_layer = WeightedLayer(scaling = 0.3),
+        reservoir = RandSparseReservoir(100, radius = 0.4),
         reservoir_driver = case,
         states_type = ExtendedStates())
     wout = train(esn, training_target, StandardRidge(10e-6))
     output = esn(Predictive(testing_input), wout)
-    println(rmsd(testing_target, output, normalize=true))
+    println(rmsd(testing_target, output, normalize = true))
 end
 ```
 
 In this example, it is also possible to observe the input of parameters to the methods `RNN()` `MRNN()`, both by argument and by keyword argument.
 
 ## Gated Recurrent Unit
+
 Gated Recurrent Units (GRUs) [^2] have been proposed in more recent years with the intent of limiting notable problems of RNNs, like the vanishing gradient. This change in the underlying equations can be easily transported into the Reservoir Computing paradigm, by switching the RNN equations in the reservoir with the GRU equations. This approach has been explored in [^3] and [^4]. Different variations of GRU have been proposed [^5][^6]; this section is subdivided into different sections that go into detail about the governing equations and the implementation of them into ReservoirComputing.jl. Like before, to access the GRU reservoir driver, it suffices to change the `reservoir_diver` keyword argument for `ESN` with `GRU()`. All the variations that will be presented can be used in this package by leveraging the keyword argument `variant` in the method `GRU()` and specifying the chosen variant: `FullyGated()` or `Minimal()`. Other variations are possible by modifying the inner layers and reservoirs. The default is set to the standard version `FullyGated()`. The first section will go into more detail about the default of the `GRU()` method, and the following ones will refer to it to minimize repetitions. This example was run on Julia v1.7.2.
 
 ### Standard GRU
+
 The equations for the standard GRU are as follows:
+
 ```math
 \mathbf{r}(t) = \sigma (\mathbf{W}^r_{\text{in}}\mathbf{u}(t)+\mathbf{W}^r\mathbf{x}(t-1)+\mathbf{b}_r) \\
 \mathbf{z}(t) = \sigma (\mathbf{W}^z_{\text{in}}\mathbf{u}(t)+\mathbf{W}^z\mathbf{x}(t-1)+\mathbf{b}_z) \\
@@ -87,19 +100,22 @@ The equations for the standard GRU are as follows:
 \mathbf{x}(t) = \mathbf{z}(t) \odot \mathbf{x}(t-1)+(1-\mathbf{z}(t)) \odot \tilde{\mathbf{x}}(t)
 ```
 
-Going over the `GRU` keyword argument, it will be explained how to feed the desired input to the model. 
- - `activation_function` is a vector with default values `[NNlib.sigmoid, NNlib.sigmoid, tanh]`. This argument controls the activation functions of the GRU, going from top to bottom. Changing the first element corresponds to changing the activation function for ``\mathbf{r}(t)`` and so on.
- - `inner_layer` is a vector with default values `fill(DenseLayer(), 2)`. This keyword argument controls the ``\mathbf{W}_{\text{in}}``s going from top to bottom like before.
- - `reservoir` is a vector with default value `fill(RandSparseReservoir(), 2)`. In a similar fashion to `inner_layer`, this keyword argument controls the reservoir matrix construction in a top to bottom order.
- - `bias` is again a vector with default value `fill(DenseLayer(), 2)`. It is meant to control the ``\mathbf{b}``s, going as usual from top to bottom.
- - `variant` controls the GRU variant. The default value is set to `FullyGated()`.
- 
-It is important to notice that `inner_layer` and `reservoir` control every layer except ``\mathbf{W}_{in}`` and ``\mathbf{W}`` and ``\mathbf{b}``. These arguments are given as input to the `ESN()` call as `input_layer`, `reservoir` and `bias`. 
+Going over the `GRU` keyword argument, it will be explained how to feed the desired input to the model.
+
+  - `activation_function` is a vector with default values `[NNlib.sigmoid, NNlib.sigmoid, tanh]`. This argument controls the activation functions of the GRU, going from top to bottom. Changing the first element corresponds to changing the activation function for ``\mathbf{r}(t)`` and so on.
+  - `inner_layer` is a vector with default values `fill(DenseLayer(), 2)`. This keyword argument controls the ``\mathbf{W}_{\text{in}}``s going from top to bottom like before.
+  - `reservoir` is a vector with default value `fill(RandSparseReservoir(), 2)`. In a similar fashion to `inner_layer`, this keyword argument controls the reservoir matrix construction in a top to bottom order.
+  - `bias` is again a vector with default value `fill(DenseLayer(), 2)`. It is meant to control the ``\mathbf{b}``s, going as usual from top to bottom.
+  - `variant` controls the GRU variant. The default value is set to `FullyGated()`.
+
+It is important to notice that `inner_layer` and `reservoir` control every layer except ``\mathbf{W}_{in}`` and ``\mathbf{W}`` and ``\mathbf{b}``. These arguments are given as input to the `ESN()` call as `input_layer`, `reservoir` and `bias`.
 
 The following sections are going to illustrate the variations of the GRU architecture and how to obtain them in ReservoirComputing.jl
 
 ### Type 1
+
 The first variation of the GRU is dependent only on the previous hidden state and the bias:
+
 ```math
 \mathbf{r}(t) = \sigma (\mathbf{W}^r\mathbf{x}(t-1)+\mathbf{b}_r) \\
 \mathbf{z}(t) = \sigma (\mathbf{W}^z\mathbf{x}(t-1)+\mathbf{b}_z) \\
@@ -108,7 +124,9 @@ The first variation of the GRU is dependent only on the previous hidden state an
 To obtain this variation, it will suffice to set `inner_layer = fill(NullLayer(), 2)` and leaving the `variant = FullyGated()`.
 
 ### Type 2
+
 The second variation only depends on the previous hidden state:
+
 ```math
 \mathbf{r}(t) = \sigma (\mathbf{W}^r\mathbf{x}(t-1)) \\
 \mathbf{z}(t) = \sigma (\mathbf{W}^z\mathbf{x}(t-1)) \\
@@ -117,7 +135,9 @@ The second variation only depends on the previous hidden state:
 Similarly to before, to obtain this variation, it is only required to set `inner_layer = fill(NullLayer(), 2)` and `bias = fill(NullLayer(), 2)` while keeping `variant = FullyGated()`.
 
 ### Type 3
+
 The final variation, before the minimal one, depends only on the biases
+
 ```math
 \mathbf{r}(t) = \sigma (\mathbf{b}_r) \\
 \mathbf{z}(t) = \sigma (\mathbf{b}_z) \\
@@ -125,8 +145,10 @@ The final variation, before the minimal one, depends only on the biases
 
 This means that it is only needed to set `inner_layer = fill(NullLayer(), 2)` and `reservoir = fill(NullReservoir(), 2)` while keeping `variant = FullyGated()`.
 
-### Minimal 
+### Minimal
+
 The minimal GRU variation merges two gates into one:
+
 ```math
 \mathbf{f}(t) = \sigma (\mathbf{W}^f_{\text{in}}\mathbf{u}(t)+\mathbf{W}^f\mathbf{x}(t-1)+\mathbf{b}_f) \\
 \tilde{\mathbf{x}}(t) = \text{tanh}(\mathbf{W}_{in}\mathbf{u}(t)+\mathbf{W}(\mathbf{f}(t) \odot \mathbf{x}(t-1))+\mathbf{b}) \\
@@ -136,9 +158,11 @@ The minimal GRU variation merges two gates into one:
 This variation can be obtained by setting `variation=Minimal()`. The `inner_layer`, `reservoir` and `bias` kwargs this time are **not** vectors, but must be defined like, for example `inner_layer = DenseLayer()` or `reservoir = SparseDenseReservoir()`.
 
 ### Examples
-To showcase the use of the `GRU()` method, this section will only illustrate the standard `FullyGated()` version. The full script for this example with the data can be found [here](https://github.com/MartinuzziFrancesco/reservoir-computing-examples/blob/main/change_drivers/gru/). 
 
-The data used for this example is the Santa Fe laser dataset [^7] retrieved from [here](https://web.archive.org/web/20160427182805/http://www-psych.stanford.edu/~andreas/Time-Series/SantaFe.html). The data is split to account for a next step prediction.
+To showcase the use of the `GRU()` method, this section will only illustrate the standard `FullyGated()` version. The full script for this example with the data can be found [here](https://github.com/MartinuzziFrancesco/reservoir-computing-examples/blob/main/change_drivers/gru/).
+
+The data used for this example is the Santa Fe laser dataset [^7] retrieved from [here](https://web.archive.org/web/20160427182805/http://www-psych.stanford.edu/%7Eandreas/Time-Series/SantaFe.html). The data is split to account for a next step prediction.
+
 ```@example gru
 using DelimitedFiles
 
@@ -148,12 +172,13 @@ train_len = 5000
 predict_len = 2000
 
 training_input = data[:, 1:train_len]
-training_target = data[:, 2:train_len+1]
-testing_input = data[:,train_len+1:train_len+predict_len]
-testing_target = data[:,train_len+2:train_len+predict_len+1]
+training_target = data[:, 2:(train_len + 1)]
+testing_input = data[:, (train_len + 1):(train_len + predict_len)]
+testing_target = data[:, (train_len + 2):(train_len + predict_len + 1)]
 ```
 
-The construction of the ESN proceeds as usual. 
+The construction of the ESN proceeds as usual.
+
 ```@example gru
 using ReservoirComputing, Random
 
@@ -161,22 +186,24 @@ res_size = 300
 res_radius = 1.4
 
 Random.seed!(42)
-esn = ESN(training_input; 
-    reservoir = RandSparseReservoir(res_size, radius=res_radius),
+esn = ESN(training_input;
+    reservoir = RandSparseReservoir(res_size, radius = res_radius),
     reservoir_driver = GRU())
 ```
 
 The default inner reservoir and input layer for the GRU are the same defaults for the `reservoir` and `input_layer` of the ESN. One can use the explicit call if they choose to.
+
 ```@example gru
-gru = GRU(reservoir=[RandSparseReservoir(res_size), 
-    RandSparseReservoir(res_size)],
-    inner_layer=[DenseLayer(), DenseLayer()])
-esn = ESN(training_input; 
-    reservoir = RandSparseReservoir(res_size, radius=res_radius),
+gru = GRU(reservoir = [RandSparseReservoir(res_size),
+        RandSparseReservoir(res_size)],
+    inner_layer = [DenseLayer(), DenseLayer()])
+esn = ESN(training_input;
+    reservoir = RandSparseReservoir(res_size, radius = res_radius),
     reservoir_driver = gru)
 ```
 
 The training and prediction can proceed as usual:
+
 ```@example gru
 training_method = StandardRidge(0.0)
 output_layer = train(esn, training_target, training_method)
@@ -184,25 +211,27 @@ output = esn(Predictive(testing_input), output_layer)
 ```
 
 The results can be plotted using Plots.jl
+
 ```@example gru
 using Plots
 
-plot([testing_target' output'], label=["actual" "predicted"], 
-    plot_title="Santa Fe Laser",
-    titlefontsize=20,
-    legendfontsize=12,
-    linewidth=2.5,
+plot([testing_target' output'], label = ["actual" "predicted"],
+    plot_title = "Santa Fe Laser",
+    titlefontsize = 20,
+    legendfontsize = 12,
+    linewidth = 2.5,
     xtickfontsize = 12,
     ytickfontsize = 12,
-    size=(1080, 720))
+    size = (1080, 720))
 ```
 
 It is interesting to see a comparison of the GRU driven ESN and the standard RNN driven ESN. Using the same parameters defined before it is possible to do the following
+
 ```@example gru
 using StatsBase
 
-esn_rnn = ESN(training_input; 
-    reservoir = RandSparseReservoir(res_size, radius=res_radius),
+esn_rnn = ESN(training_input;
+    reservoir = RandSparseReservoir(res_size, radius = res_radius),
     reservoir_driver = RNN())
 
 output_layer = train(esn_rnn, training_target, training_method)
diff --git a/docs/src/esn_tutorials/hybrid.md b/docs/src/esn_tutorials/hybrid.md
index 2a7b72b7..bf274f01 100644
--- a/docs/src/esn_tutorials/hybrid.md
+++ b/docs/src/esn_tutorials/hybrid.md
@@ -1,48 +1,54 @@
 # Hybrid Echo State Networks
+
 Following the idea of giving physical information to machine learning models, the hybrid echo state networks [^1] try to achieve this results by feeding model data into the ESN. In this example, it is explained how to create and leverage such models in ReservoirComputing.jl. The full script for this example is available [here](https://github.com/MartinuzziFrancesco/reservoir-computing-examples/blob/main/hybrid/hybrid.jl). This example was run on Julia v1.7.2.
 
 ## Generating the data
+
 For this example, we are going to forecast the Lorenz system. As usual, the data is generated leveraging `DifferentialEquations.jl`:
+
 ```@example hybrid
 using DifferentialEquations
 
-u0 = [1.0,0.0,0.0]                       
-tspan = (0.0,1000.0)  
+u0 = [1.0, 0.0, 0.0]
+tspan = (0.0, 1000.0)
 datasize = 100000
-tsteps = range(tspan[1], tspan[2], length = datasize)                   
+tsteps = range(tspan[1], tspan[2], length = datasize)
 
-function lorenz(du,u,p,t)
-    p = [10.0,28.0,8/3]
-    du[1] = p[1]*(u[2]-u[1])
-    du[2] = u[1]*(p[2]-u[3]) - u[2]
-    du[3] = u[1]*u[2] - p[3]*u[3]
+function lorenz(du, u, p, t)
+    p = [10.0, 28.0, 8 / 3]
+    du[1] = p[1] * (u[2] - u[1])
+    du[2] = u[1] * (p[2] - u[3]) - u[2]
+    du[3] = u[1] * u[2] - p[3] * u[3]
 end
 
 ode_prob = ODEProblem(lorenz, u0, tspan)
 ode_sol = solve(ode_prob, saveat = tsteps)
-ode_data =Array(ode_sol)
+ode_data = Array(ode_sol)
 
 train_len = 10000
 
-input_data  = ode_data[:, 1:train_len]
-target_data = ode_data[:, 2:train_len+1]
-test_data   = ode_data[:, train_len+1:end][:, 1:1000]
+input_data = ode_data[:, 1:train_len]
+target_data = ode_data[:, 2:(train_len + 1)]
+test_data = ode_data[:, (train_len + 1):end][:, 1:1000]
 
 predict_len = size(test_data, 2)
 tspan_train = (tspan[1], ode_sol.t[train_len])
 ```
 
 ## Building the Hybrid Echo State Network
+
 To feed the data to the ESN, it is necessary to create a suitable function.
+
 ```@example hybrid
 function prior_model_data_generator(u0, tspan, tsteps, model = lorenz)
-    prob = ODEProblem(lorenz, u0, tspan) 
+    prob = ODEProblem(lorenz, u0, tspan)
     sol = Array(solve(prob, saveat = tsteps))
     return sol
 end
 ```
 
 Given the initial condition, time span, and time steps, this function returns the data for the chosen model. Now, using the `Hybrid` method, it is possible to input all this information to the model.
+
 ```@example hybrid
 using ReservoirComputing, Random
 Random.seed!(42)
@@ -55,31 +61,35 @@ esn = ESN(input_data,
 ```
 
 ## Training and Prediction
+
 The training and prediction of the Hybrid ESN can proceed as usual:
+
 ```@example hybrid
 output_layer = train(esn, target_data, StandardRidge(0.3))
 output = esn(Generative(predict_len), output_layer)
 ```
 
 It is now possible to plot the results, leveraging Plots.jl:
+
 ```@example hybrid
 using Plots
 lorenz_maxlyap = 0.9056
-predict_ts = tsteps[train_len+1:train_len+predict_len]
-lyap_time = (predict_ts .- predict_ts[1])*(1/lorenz_maxlyap)
-
-p1 = plot(lyap_time, [test_data[1,:] output[1,:]], label = ["actual" "predicted"], 
-    ylabel = "x(t)", linewidth=2.5, xticks=false, yticks = -15:15:15);
-p2 = plot(lyap_time, [test_data[2,:] output[2,:]], label = ["actual" "predicted"], 
-    ylabel = "y(t)", linewidth=2.5, xticks=false, yticks = -20:20:20);
-p3 = plot(lyap_time, [test_data[3,:] output[3,:]], label = ["actual" "predicted"], 
-    ylabel = "z(t)", linewidth=2.5, xlabel = "max(λ)*t", yticks = 10:15:40);
-
-
-plot(p1, p2, p3, plot_title = "Lorenz System Coordinates", 
-    layout=(3,1), xtickfontsize = 12, ytickfontsize = 12, xguidefontsize=15, yguidefontsize=15,
-    legendfontsize=12, titlefontsize=20)
+predict_ts = tsteps[(train_len + 1):(train_len + predict_len)]
+lyap_time = (predict_ts .- predict_ts[1]) * (1 / lorenz_maxlyap)
+
+p1 = plot(lyap_time, [test_data[1, :] output[1, :]], label = ["actual" "predicted"],
+    ylabel = "x(t)", linewidth = 2.5, xticks = false, yticks = -15:15:15);
+p2 = plot(lyap_time, [test_data[2, :] output[2, :]], label = ["actual" "predicted"],
+    ylabel = "y(t)", linewidth = 2.5, xticks = false, yticks = -20:20:20);
+p3 = plot(lyap_time, [test_data[3, :] output[3, :]], label = ["actual" "predicted"],
+    ylabel = "z(t)", linewidth = 2.5, xlabel = "max(λ)*t", yticks = 10:15:40);
+
+plot(p1, p2, p3, plot_title = "Lorenz System Coordinates",
+    layout = (3, 1), xtickfontsize = 12, ytickfontsize = 12, xguidefontsize = 15,
+    yguidefontsize = 15,
+    legendfontsize = 12, titlefontsize = 20)
 ```
 
 ## Bibliography
+
 [^1]: Pathak, Jaideep, et al. "_Hybrid forecasting of chaotic processes: Using machine learning in conjunction with a knowledge-based model._" Chaos: An Interdisciplinary Journal of Nonlinear Science 28.4 (2018): 041101.
diff --git a/docs/src/esn_tutorials/lorenz_basic.md b/docs/src/esn_tutorials/lorenz_basic.md
index 84b53220..f820a36d 100644
--- a/docs/src/esn_tutorials/lorenz_basic.md
+++ b/docs/src/esn_tutorials/lorenz_basic.md
@@ -1,25 +1,28 @@
- # Lorenz System Forecasting
- 
-This example expands on the readme Lorenz system forecasting to better showcase how to use methods and functions provided in the library for Echo State Networks. Here the prediction method used is ```Generative```, for a more detailed explanation of the differences between ```Generative``` and ```Predictive``` please refer to the other examples given in the documentation. The full script for this example is available [here](https://github.com/MartinuzziFrancesco/reservoir-computing-examples/blob/main/lorenz_basic/lorenz_basic.jl). This example was run on Julia v1.7.2.
+# Lorenz System Forecasting
+
+This example expands on the readme Lorenz system forecasting to better showcase how to use methods and functions provided in the library for Echo State Networks. Here the prediction method used is `Generative`, for a more detailed explanation of the differences between `Generative` and `Predictive` please refer to the other examples given in the documentation. The full script for this example is available [here](https://github.com/MartinuzziFrancesco/reservoir-computing-examples/blob/main/lorenz_basic/lorenz_basic.jl). This example was run on Julia v1.7.2.
 
 ## Generating the data
-Starting off the workflow, the first step is to obtain the data. Leveraging ```OrdinaryDiffEq``` it is possible to derive the Lorenz system data in the following way:
+
+Starting off the workflow, the first step is to obtain the data. Leveraging `OrdinaryDiffEq` it is possible to derive the Lorenz system data in the following way:
+
 ```@example lorenz
 using OrdinaryDiffEq
 
 #define lorenz system
-function lorenz!(du,u,p,t)
-    du[1] = 10.0*(u[2]-u[1])
-    du[2] = u[1]*(28.0-u[3]) - u[2]
-    du[3] = u[1]*u[2] - (8/3)*u[3]
+function lorenz!(du, u, p, t)
+    du[1] = 10.0 * (u[2] - u[1])
+    du[2] = u[1] * (28.0 - u[3]) - u[2]
+    du[3] = u[1] * u[2] - (8 / 3) * u[3]
 end
 
 #solve and take data
-prob = ODEProblem(lorenz!, [1.0,0.0,0.0], (0.0,200.0))
-data = solve(prob, ABM54(), dt=0.02)
+prob = ODEProblem(lorenz!, [1.0, 0.0, 0.0], (0.0, 200.0))
+data = solve(prob, ABM54(), dt = 0.02)
 ```
 
-After obtaining the data, it is necessary to determine the kind of prediction for the model. Since this example will use the ```Generative``` prediction type, this means that the target data will be the next step of the input data. In addition, it is important to notice that the Lorenz system just obtained presents a transient period that is not representative of the general behavior of the system. This can easily be discarded by setting a ```shift``` parameter.
+After obtaining the data, it is necessary to determine the kind of prediction for the model. Since this example will use the `Generative` prediction type, this means that the target data will be the next step of the input data. In addition, it is important to notice that the Lorenz system just obtained presents a transient period that is not representative of the general behavior of the system. This can easily be discarded by setting a `shift` parameter.
+
 ```@example lorenz
 #determine shift length, training length and prediction length
 shift = 300
@@ -27,50 +30,56 @@ train_len = 5000
 predict_len = 1250
 
 #split the data accordingly
-input_data = data[:, shift:shift+train_len-1]
-target_data = data[:, shift+1:shift+train_len]
-test_data = data[:,shift+train_len+1:shift+train_len+predict_len]
+input_data = data[:, shift:(shift + train_len - 1)]
+target_data = data[:, (shift + 1):(shift + train_len)]
+test_data = data[:, (shift + train_len + 1):(shift + train_len + predict_len)]
 ```
 
-It is *important* to notice that the data needs to be formatted in a matrix with the features as rows and time steps as columns as in this example. This is needed even if the time series consists of single values. 
+It is *important* to notice that the data needs to be formatted in a matrix with the features as rows and time steps as columns as in this example. This is needed even if the time series consists of single values.
 
 ## Building the Echo State Network
-Once the data is ready, it is possible to define the parameters for the ESN and the ```ESN``` struct itself. In this example, the values from [^1] are loosely followed as general guidelines.
+
+Once the data is ready, it is possible to define the parameters for the ESN and the `ESN` struct itself. In this example, the values from [^1] are loosely followed as general guidelines.
+
 ```@example lorenz
 using ReservoirComputing
 
 #define ESN parameters
 res_size = 300
 res_radius = 1.2
-res_sparsity = 6/300
+res_sparsity = 6 / 300
 input_scaling = 0.1
 
 #build ESN struct
-esn = ESN(input_data; 
+esn = ESN(input_data;
     variation = Default(),
-    reservoir = RandSparseReservoir(res_size, radius=res_radius, sparsity=res_sparsity),
-    input_layer = WeightedLayer(scaling=input_scaling),
+    reservoir = RandSparseReservoir(res_size, radius = res_radius, sparsity = res_sparsity),
+    input_layer = WeightedLayer(scaling = input_scaling),
     reservoir_driver = RNN(),
     nla_type = NLADefault(),
     states_type = StandardStates())
 ```
 
-Most of the parameters chosen here mirror the default ones, so a direct call is not necessary. The readme example is identical to this one, except for the explicit call. Going line by line to see what is happening, starting from ```res_size```: this value determines the dimensions of the reservoir matrix. In this case, a size of 300 has been chosen, so the reservoir matrix will be 300 x 300. This is not always the case, since some input layer constructions can modify the dimensions of the reservoir, but in that case, everything is taken care of internally. 
+Most of the parameters chosen here mirror the default ones, so a direct call is not necessary. The readme example is identical to this one, except for the explicit call. Going line by line to see what is happening, starting from `res_size`: this value determines the dimensions of the reservoir matrix. In this case, a size of 300 has been chosen, so the reservoir matrix will be 300 x 300. This is not always the case, since some input layer constructions can modify the dimensions of the reservoir, but in that case, everything is taken care of internally.
+
+The `res_radius` determines the scaling of the spectral radius of the reservoir matrix; a proper scaling is necessary to assure the Echo State Property. The default value in the `RandSparseReservoir()` method is 1.0 in accordance with the most commonly followed guidelines found in the literature (see [^2] and references therein). The `sparsity` of the reservoir matrix in this case is obtained by choosing a degree of connections and dividing that by the reservoir size. Of course, it is also possible to simply choose any value between 0.0 and 1.0 to test behaviors for different sparsity values. In this example, the call to the parameters inside `RandSparseReservoir()` was done explicitly to showcase the meaning of each of them, but it is also possible to simply pass the values directly, like so `RandSparseReservoir(1.2, 6/300)`.
 
-The ```res_radius``` determines the scaling of the spectral radius of the reservoir matrix; a proper scaling is necessary to assure the Echo State Property. The default value in the ```RandSparseReservoir()``` method is 1.0 in accordance with the most commonly followed guidelines found in the literature (see [^2] and references therein). The ```sparsity``` of the reservoir matrix in this case is obtained by choosing a degree of connections and dividing that by the reservoir size. Of course, it is also possible to simply choose any value between 0.0 and 1.0 to test behaviors for different sparsity values. In this example, the call to the parameters inside ```RandSparseReservoir()``` was done explicitly to showcase the meaning of each of them, but it is also possible to simply pass the values directly, like so ```RandSparseReservoir(1.2, 6/300)```.
+The value of `input_scaling` determines the upper and lower bounds of the uniform distribution of the weights in the `WeightedLayer()`. Like before, this value can be passed either as an argument or as a keyword argument `WeightedLayer(0.1)`. The value of 0.1 represents the default. The default input layer is the `DenseLayer`, a fully connected layer. The details of the weighted version can be found in [^3], for this example, this version returns the best results.
 
-The value of ```input_scaling``` determines the upper and lower bounds of the uniform distribution of the weights in the ```WeightedLayer()```. Like before, this value can be passed either as an argument or as a keyword argument ```WeightedLayer(0.1)```. The value of 0.1 represents the default. The default input layer is the ```DenseLayer```, a fully connected layer. The details of the weighted version can be found in [^3], for this example, this version returns the best results.
+The reservoir driver represents the dynamics of the reservoir. In the standard ESN definition, these dynamics are obtained through a Recurrent Neural Network (RNN), and this is reflected by calling the `RNN` driver for the `ESN` struct. This option is set as the default, and unless there is the need to change parameters, it is not needed. The full equation is the following:
 
-The reservoir driver represents the dynamics of the reservoir. In the standard ESN definition, these dynamics are obtained through a Recurrent Neural Network (RNN), and this is reflected by calling the ```RNN``` driver for the ```ESN``` struct. This option is set as the default, and unless there is the need to change parameters, it is not needed. The full equation is the following:
 ```math
 \textbf{x}(t+1) = (1-\alpha)\textbf{x}(t) + \alpha \cdot \text{tanh}(\textbf{W}\textbf{x}(t)+\textbf{W}_{\text{in}}\textbf{u}(t))
 ```
-where ``α`` represents the leaky coefficient, and tanh can be any activation function. Also, ``\textbf{x}`` represents the state vector, ``\textbf{u}`` the input data, and ``\textbf{W}, \textbf{W}_{\text{in}}`` are the reservoir matrix and input matrix, respectively. The default call to the RNN in the library is the following ```RNN(;activation_function=tanh, leaky_coefficient=1.0)```, where the meaning of the parameters is clear from the equation above. Instead of the hyperbolic tangent, any activation function can be used, either leveraging external libraries such as ```NNlib``` or creating a custom one. 
 
-The final calls are modifications to the states in training or prediction. The default calls, depicted in the example, do not make any modifications to the states. This is the safest bet if one is not sure how these work. The ```nla_type``` applies a non-linear algorithm to the states, while the ```states_type``` can expand them by concatenating them with the input data, or padding them by concatenating a constant value to all the states. More in depth descriptions of these parameters are given in other examples in the documentation.
+where ``α`` represents the leaky coefficient, and tanh can be any activation function. Also, ``\textbf{x}`` represents the state vector, ``\textbf{u}`` the input data, and ``\textbf{W}, \textbf{W}_{\text{in}}`` are the reservoir matrix and input matrix, respectively. The default call to the RNN in the library is the following `RNN(;activation_function=tanh, leaky_coefficient=1.0)`, where the meaning of the parameters is clear from the equation above. Instead of the hyperbolic tangent, any activation function can be used, either leveraging external libraries such as `NNlib` or creating a custom one.
+
+The final calls are modifications to the states in training or prediction. The default calls, depicted in the example, do not make any modifications to the states. This is the safest bet if one is not sure how these work. The `nla_type` applies a non-linear algorithm to the states, while the `states_type` can expand them by concatenating them with the input data, or padding them by concatenating a constant value to all the states. More in depth descriptions of these parameters are given in other examples in the documentation.
 
 ## Training and Prediction
+
 Now that the ESN has been created and all the parameters have been explained, it is time to proceed with the training. The full call of the readme example follows this general idea:
+
 ```@example lorenz
 #define training method
 training_method = StandardRidge(0.0)
@@ -79,37 +88,41 @@ training_method = StandardRidge(0.0)
 output_layer = train(esn, target_data, training_method)
 ```
 
-The training returns an ```OutputLayer``` struct containing the trained output matrix and other  needed for the prediction. The necessary elements in the ```train()``` call are the ```ESN``` struct created in the previous step and the ```target_data```, which in this case is the one step ahead evolution of the Lorenz system. The training method chosen in this example is the standard one, so an equivalent way of calling the ```train``` function here is ```output_layer = train(esn, target_data)``` like the readme basic version. Likewise, the default value for the ridge regression parameter is set to zero, so the actual default training is Ordinary Least Squares regression. Other training methods are available and will be explained in the following examples. 
+The training returns an `OutputLayer` struct containing the trained output matrix and other  needed for the prediction. The necessary elements in the `train()` call are the `ESN` struct created in the previous step and the `target_data`, which in this case is the one step ahead evolution of the Lorenz system. The training method chosen in this example is the standard one, so an equivalent way of calling the `train` function here is `output_layer = train(esn, target_data)` like the readme basic version. Likewise, the default value for the ridge regression parameter is set to zero, so the actual default training is Ordinary Least Squares regression. Other training methods are available and will be explained in the following examples.
+
+Once the `OutputLayer` has been obtained, the prediction can be done following this procedure:
 
-Once the ```OutputLayer``` has been obtained, the prediction can be done following this procedure:
 ```@example lorenz
 output = esn(Generative(predict_len), output_layer)
 ```
-both the training method and the output layer are needed in this call. The number of steps for the prediction must be specified in the ```Generative``` method. The output results are given in a matrix. 
+
+both the training method and the output layer are needed in this call. The number of steps for the prediction must be specified in the `Generative` method. The output results are given in a matrix.
 
 !!! info "Saving the states during prediction"
+    
     While the states are saved in the `ESN` struct for the training, for the prediction they are not saved by default. To inspect the states, it is necessary to pass the boolean keyword argument `save_states` to the prediction call, in this example using `esn(... ; save_states=true)`. This returns a tuple `(output, states)` where `size(states) = res_size, prediction_len`
 
-To inspect the results, they can easily be plotted using an external library. In this case, ```Plots``` is adopted:
+To inspect the results, they can easily be plotted using an external library. In this case, `Plots` is adopted:
+
 ```@example lorenz
 using Plots, Plots.PlotMeasures
 
 ts = 0.0:0.02:200.0
 lorenz_maxlyap = 0.9056
-predict_ts = ts[shift+train_len+1:shift+train_len+predict_len]
-lyap_time = (predict_ts .- predict_ts[1])*(1/lorenz_maxlyap)
-
-p1 = plot(lyap_time, [test_data[1,:] output[1,:]], label = ["actual" "predicted"], 
-    ylabel = "x(t)", linewidth=2.5, xticks=false, yticks = -15:15:15);
-p2 = plot(lyap_time, [test_data[2,:] output[2,:]], label = ["actual" "predicted"], 
-    ylabel = "y(t)", linewidth=2.5, xticks=false, yticks = -20:20:20);
-p3 = plot(lyap_time, [test_data[3,:] output[3,:]], label = ["actual" "predicted"], 
-    ylabel = "z(t)", linewidth=2.5, xlabel = "max(λ)*t", yticks = 10:15:40);
-
-
-plot(p1, p2, p3, plot_title = "Lorenz System Coordinates", 
-    layout=(3,1), xtickfontsize = 12, ytickfontsize = 12, xguidefontsize=15, yguidefontsize=15,
-    legendfontsize=12, titlefontsize=20)
+predict_ts = ts[(shift + train_len + 1):(shift + train_len + predict_len)]
+lyap_time = (predict_ts .- predict_ts[1]) * (1 / lorenz_maxlyap)
+
+p1 = plot(lyap_time, [test_data[1, :] output[1, :]], label = ["actual" "predicted"],
+    ylabel = "x(t)", linewidth = 2.5, xticks = false, yticks = -15:15:15);
+p2 = plot(lyap_time, [test_data[2, :] output[2, :]], label = ["actual" "predicted"],
+    ylabel = "y(t)", linewidth = 2.5, xticks = false, yticks = -20:20:20);
+p3 = plot(lyap_time, [test_data[3, :] output[3, :]], label = ["actual" "predicted"],
+    ylabel = "z(t)", linewidth = 2.5, xlabel = "max(λ)*t", yticks = 10:15:40);
+
+plot(p1, p2, p3, plot_title = "Lorenz System Coordinates",
+    layout = (3, 1), xtickfontsize = 12, ytickfontsize = 12, xguidefontsize = 15,
+    yguidefontsize = 15,
+    legendfontsize = 12, titlefontsize = 20)
 ```
 
 ## Bibliography
@@ -117,4 +130,3 @@ plot(p1, p2, p3, plot_title = "Lorenz System Coordinates",
 [^1]: Pathak, Jaideep, et al. "_Using machine learning to replicate chaotic attractors and calculate Lyapunov exponents from data._" Chaos: An Interdisciplinary Journal of Nonlinear Science 27.12 (2017): 121102.
 [^2]: Lukoševičius, Mantas. "_A practical guide to applying echo state networks._" Neural networks: Tricks of the trade. Springer, Berlin, Heidelberg, 2012. 659-686.
 [^3]: Lu, Zhixin, et al. "_Reservoir observers: Model-free inference of unmeasured variables in chaotic systems._" Chaos: An Interdisciplinary Journal of Nonlinear Science 27.4 (2017): 041102.
-
diff --git a/docs/src/general/different_training.md b/docs/src/general/different_training.md
index 61f0a519..bb92680c 100644
--- a/docs/src/general/different_training.md
+++ b/docs/src/general/different_training.md
@@ -1,5 +1,7 @@
 # Changing Training Algorithms
+
 Notably Echo State Networks have been trained with Ridge Regression algorithms, but the range of useful algorithms to use is much greater. In this section of the documentation, it is possible to explore how to use other training methods to obtain the readout layer. All the methods implemented in ReservoirComputing.jl can be used for all models in the library, not only ESNs. The general workflow illustrated in this section will be based on a dummy RC model `my_model = MyModel(...)` that needs training to obtain the readout layer. The training is done as follows:
+
 ```julia
 training_algo = TrainingAlgo()
 readout_layer = train(my_model, train_data, training_algo)
@@ -8,20 +10,23 @@ readout_layer = train(my_model, train_data, training_algo)
 In this section, it is possible to explore how to properly build the `training_algo` and all the possible choices available. In the example section of the documentation it will be provided copy-pasteable code to better explore the training algorithms and their impact on the model.
 
 ## Linear Models
+
 The library includes a standard implementation of ridge regression, callable using `StandardRidge(regularization_coeff)`. The default regularization coefficient is set to zero. This is also the default model called when no model is specified in `train()`. This makes the default call for training `train(my_model, train_data)` use Ordinary Least Squares (OLS) for regression.
 
 Leveraging [MLJLinearModels](https://juliaai.github.io/MLJLinearModels.jl/stable/) you can expand your choices of linear models for training. The wrappers provided follow this structure:
+
 ```julia
 struct LinearModel
-    regression
-    solver
-    regression_kwargs
+    regression::Any
+    solver::Any
+    regression_kwargs::Any
 end
 ```
+
 To call the ridge regression using the MLJLinearModels APIs, you can use `LinearModel(;regression=LinearRegression)`. You can also choose a specific solver by calling, for example, `LinearModel(regression=LinearRegression, solver=Analytical())`. For all the available solvers, please refer to the [MLJLinearModels documentation](https://juliaai.github.io/MLJLinearModels.jl/stable/models/).
 
 To change the regularization coefficient in the ridge example, using for example `lambda = 0.1`, it is needed to pass it in the `regression_kwargs` like so `LinearModel(;regression=LinearRegression, solver=Analytical(), regression_kwargs=(lambda=lambda))`. The nomenclature of the coefficients must follow the MLJLinearModels APIs, using `lambda, gamma` for `LassoRegression` and `delta, lambda, gamma` for `HuberRegression`. Again, please check the [relevant documentation](https://juliaai.github.io/MLJLinearModels.jl/stable/api/) if in doubt. When using MLJLinearModels based regressors, do remember to specify `using MLJLinearModels`.
 
-
 ## Support Vector Regression
-Contrary to the `LinearModel`s, no wrappers are needed for support vector regression. By using [LIBSVM.jl](https://github.com/JuliaML/LIBSVM.jl), LIBSVM wrappers in Julia, it is possible to call both `epsilonSVR()` or `nuSVR()` directly in `train()`. For the full range of kernels provided and the parameters to call, we refer the user to the official [documentation](https://www.csie.ntu.edu.tw/~cjlin/libsvm/). Like before, if one intends to use LIBSVM regressors, it is necessary to specify `using LIBSVM`.
+
+Contrary to the `LinearModel`s, no wrappers are needed for support vector regression. By using [LIBSVM.jl](https://github.com/JuliaML/LIBSVM.jl), LIBSVM wrappers in Julia, it is possible to call both `epsilonSVR()` or `nuSVR()` directly in `train()`. For the full range of kernels provided and the parameters to call, we refer the user to the official [documentation](https://www.csie.ntu.edu.tw/%7Ecjlin/libsvm/). Like before, if one intends to use LIBSVM regressors, it is necessary to specify `using LIBSVM`.
diff --git a/docs/src/general/predictive_generative.md b/docs/src/general/predictive_generative.md
index 790d3d88..c3be7b26 100644
--- a/docs/src/general/predictive_generative.md
+++ b/docs/src/general/predictive_generative.md
@@ -1,14 +1,17 @@
 # Generative vs Predictive
+
 The library provides two different methods for prediction, denoted as `Predictive()` and `Generative()`. These methods correspond to the two major applications of Reservoir Computing models found in the literature. This section aims to clarify the differences between these two methods before providing further details on their usage in the library.
 
 ## Predictive
+
 In the first method, users can utilize Reservoir Computing models in a manner similar to standard Machine Learning models. This involves using a set of features as input and a set of labels as outputs. In this case, both the feature and label sets can consist of vectors of different dimensions. Specifically, let's denote the feature set as ``X=\{x_1,...,x_n\}`` where ``x_i \in \mathbb{R}^{N}``, and the label set as ``Y=\{y_1,...,y_n\}`` where ``y_i \in \mathbb{R}^{M}``.
 
 To make predictions using this method, you need to provide the feature set that you want to predict the labels for. For example, you can call `Predictive(X)` using the feature set ``X`` as input. This method allows for both one-step-ahead and multi-step-ahead predictions.
 
 ## Generative
+
 The generative method provides a different approach to forecasting with Reservoir Computing models. It enables you to extend the forecasting capabilities of the model by allowing predicted results to be fed back into the model to generate the next prediction. This autonomy allows the model to make predictions without the need for a feature dataset as input.
 
 To use the generative method, you only need to specify the number of time steps that you intend to forecast. For instance, you can call `Generative(100)` to generate predictions for the next one hundred time steps.
 
-The key distinction between these methods lies in how predictions are made. The predictive method relies on input feature sets to make predictions, while the generative method allows for autonomous forecasting by feeding predicted results back into the model.
\ No newline at end of file
+The key distinction between these methods lies in how predictions are made. The predictive method relies on input feature sets to make predictions, while the generative method allows for autonomous forecasting by feeding predicted results back into the model.
diff --git a/docs/src/general/states_variation.md b/docs/src/general/states_variation.md
index 07a933ec..a1b0dab5 100644
--- a/docs/src/general/states_variation.md
+++ b/docs/src/general/states_variation.md
@@ -1,12 +1,15 @@
 # Altering States
+
 In ReservoirComputing models, it's possible to perform alterations on the reservoir states during the training stage. These alterations can improve prediction results or replicate results found in the literature. Alterations are categorized into two possibilities: padding or extending the states, and applying non-linear algorithms to the states.
 
 ## Padding and Extending States
+
 ### Extending States
 
 Extending the states involves appending the corresponding input values to the reservoir states. If \(\textbf{x}(t)\) represents the reservoir state at time \(t\) corresponding to the input \(\textbf{u}(t)\), the extended state is represented as \([\textbf{x}(t); \textbf{u}(t)]\), where \([;]\) denotes vertical concatenation. This procedure is commonly used in Echo State Networks and is described in [Jaeger's Scholarpedia](http://www.scholarpedia.org/article/Echo_state_network). You can extend the states in every ReservoirComputing.jl model by using the `states_type` keyword argument and calling the `ExtendedStates()` method. No additional arguments are needed.
 
 ### Padding States
+
 Padding the states involves appending a constant value, such as 1.0, to each state. In the notation introduced earlier, padded states can be represented as \([\textbf{x}(t); 1.0]\). This approach is detailed in the [seminal guide](https://mantas.info/get-publication/?f=Practical_ESN.pdf) to Echo State Networks by Mantas Lukoševičius. To pad the states, you can use the `states_type` keyword argument and call the `PaddedStates(padding)` method, where `padding` represents the value to be concatenated to the states. By default, the padding value is set to 1.0, so most of the time, calling `PaddedStates()` will suffice.
 
 Additionally, you can pad the extended states by using the `PaddedExtendedStates(padding)` method, which also has a default padding value of 1.0.
@@ -14,34 +17,37 @@ Additionally, you can pad the extended states by using the `PaddedExtendedStates
 You can choose not to apply any of these changes to the states by calling `StandardStates()`, which is the default choice for the states.
 
 ## Non-Linear Algorithms
+
 First introduced in [^1] and expanded in [^2], non-linear algorithms are nonlinear combinations of the columns of the matrix states. There are three such algorithms implemented in ReservoirComputing.jl, and you can choose which one to use with the `nla_type` keyword argument. The default value is set to `NLADefault()`, which means no non-linear algorithm is applied.
 
 The available non-linear algorithms are:
 
-- `NLAT1()`
-- `NLAT2()`
-- `NLAT3()`
+  - `NLAT1()`
+  - `NLAT2()`
+  - `NLAT3()`
 
-These algorithms perform specific operations on the reservoir states. To provide a better understanding of what they do, let ``\textbf{x}_{i, j}`` be elements of the state matrix, with ``i=1,...,T \ j=1,...,N`` where ``T`` is the length of the training and ``N`` is the reservoir size. 
+These algorithms perform specific operations on the reservoir states. To provide a better understanding of what they do, let ``\textbf{x}_{i, j}`` be elements of the state matrix, with ``i=1,...,T \ j=1,...,N`` where ``T`` is the length of the training and ``N`` is the reservoir size.
 
 **NLAT1**
+
 ```math
 \tilde{\textbf{x}}_{i,j} = \textbf{x}_{i,j} \times \textbf{x}_{i,j} \ \ \text{if \textit{j} is odd} \\
 \tilde{\textbf{x}}_{i,j} = \textbf{x}_{i,j}  \ \ \text{if \textit{j} is even}
 ```
 
 **NLAT2**
+
 ```math
 \tilde{\textbf{x}}_{i,j} = \textbf{x}_{i,j-1} \times \textbf{x}_{i,j-2} \ \ \text{if \textit{j} > 1 is odd} \\
 \tilde{\textbf{x}}_{i,j} = \textbf{x}_{i,j}  \ \ \text{if \textit{j} is 1 or even}
 ```
 
 **NLAT3**
+
 ```math
 \tilde{\textbf{x}}_{i,j} = \textbf{x}_{i,j-1} \times \textbf{x}_{i,j+1} \ \ \text{if \textit{j} > 1 is odd} \\
 \tilde{\textbf{x}}_{i,j} = \textbf{x}_{i,j}  \ \ \text{if \textit{j} is 1 or even}
 ```
 
 [^1]: Pathak, Jaideep, et al. "_Using machine learning to replicate chaotic attractors and calculate Lyapunov exponents from data._" Chaos: An Interdisciplinary Journal of Nonlinear Science 27.12 (2017): 121102.
-
 [^2]: Chattopadhyay, Ashesh, Pedram Hassanzadeh, and Devika Subramanian. "_Data-driven predictions of a multiscale Lorenz 96 chaotic system using machine-learning methods: reservoir computing, artificial neural network, and long short-term memory network._" Nonlinear Processes in Geophysics 27.3 (2020): 373-389.
diff --git a/docs/src/index.md b/docs/src/index.md
index ed7fa8a3..ac8e1f66 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -3,16 +3,20 @@
 ReservoirComputing.jl is a versatile and user-friendly Julia package designed for the implementation of advanced Reservoir Computing models, such as Echo State Networks (ESNs). Central to Reservoir Computing is the expansion of input data into a higher-dimensional space, leveraging regression techniques for effective model training. This approach bears resemblance to kernel methods, offering a unique perspective in machine learning. ReservoirComputing.jl offers a modular design, ensuring both ease of use for newcomers and flexibility for advanced users, establishing it as a key tool for innovative computing solutions.
 
 !!! info "Introductory material"
-    This library assumes some basic knowledge of Reservoir Computing. For a good introduction, we suggest the following papers: the first two are the seminal papers about ESN and LSM, the others are in-depth review papers that should cover all the needed information. For the majority of the algorithms implemented in this library we cited in the documentation the original work introducing them. If you ever are in doubt about a method or a function just type ```? function``` in the Julia REPL to read the relevant notes.
-
-    * Jaeger, Herbert: The “echo state” approach to analyzing and training recurrent neural networks-with an erratum note.
-    * Maass W, Natschläger T, Markram H: Real-time computing without stable states: a new framework for neural computation based on perturbations.
-    * Lukoševičius, Mantas: A practical guide to applying echo state networks." Neural networks: Tricks of the trade.
-    * Lukoševičius, Mantas, and Herbert Jaeger: Reservoir computing approaches to recurrent neural network training.
     
+    This library assumes some basic knowledge of Reservoir Computing. For a good introduction, we suggest the following papers: the first two are the seminal papers about ESN and LSM, the others are in-depth review papers that should cover all the needed information. For the majority of the algorithms implemented in this library we cited in the documentation the original work introducing them. If you ever are in doubt about a method or a function just type `? function` in the Julia REPL to read the relevant notes.
+    
+      - Jaeger, Herbert: The “echo state” approach to analyzing and training recurrent neural networks-with an erratum note.
+      - Maass W, Natschläger T, Markram H: Real-time computing without stable states: a new framework for neural computation based on perturbations.
+      - Lukoševičius, Mantas: A practical guide to applying echo state networks." Neural networks: Tricks of the trade.
+      - Lukoševičius, Mantas, and Herbert Jaeger: Reservoir computing approaches to recurrent neural network training.
+
 !!! info "Performance tip"
+    
     For faster computations on the CPU it is suggested to add `using MKL` to the script. For clarity's sake this library will not be indicated under every example in the documentation.
+
 ## Installation
+
 To install ReservoirComputing.jl, ensure you have Julia version 1.6 or higher. Follow these steps:
 
     1. Open the Julia command line.
@@ -20,22 +24,25 @@ To install ReservoirComputing.jl, ensure you have Julia version 1.6 or higher. F
     3. Type add ReservoirComputing and press Enter.
 
 For a more customized installation or to contribute to the package, consider cloning the repository:
+
 ```julia
 using Pkg
 Pkg.clone("https://github.com/SciML/ReservoirComputing.jl.git")
 ```
+
 or `dev` the package.
 
 ## Features Overview
 
-- **Multiple Training Algorithms**: Supports Ridge Regression, Linear Models, and LIBSVM regression methods for Reservoir Computing models.
-- **Diverse Prediction Methods**: Offers both generative and predictive methods for Reservoir Computing predictions.
-- **Modifiable Training and Prediction**: Allows modifications in Reservoir Computing states, such as state extension, padding, and combination methods.
-- **Non-linear Algorithm Options**: Includes options for non-linear modifications in algorithms.
-- **Echo State Networks (ESNs)**: Features various input layers, reservoirs, and methods for driving ESN reservoir states.
-- **Cellular Automata-Based Reservoir Computing**: Introduces models based on one-dimensional Cellular Automata for Reservoir Computing.
+  - **Multiple Training Algorithms**: Supports Ridge Regression, Linear Models, and LIBSVM regression methods for Reservoir Computing models.
+  - **Diverse Prediction Methods**: Offers both generative and predictive methods for Reservoir Computing predictions.
+  - **Modifiable Training and Prediction**: Allows modifications in Reservoir Computing states, such as state extension, padding, and combination methods.
+  - **Non-linear Algorithm Options**: Includes options for non-linear modifications in algorithms.
+  - **Echo State Networks (ESNs)**: Features various input layers, reservoirs, and methods for driving ESN reservoir states.
+  - **Cellular Automata-Based Reservoir Computing**: Introduces models based on one-dimensional Cellular Automata for Reservoir Computing.
 
 ## Contributing
+
 Contributions to ReservoirComputing.jl are highly encouraged and appreciated. Whether it's through implementing new RC model variations, enhancing documentation, adding examples, or any improvement, your contribution is valuable. We welcome posts of relevant papers or ideas in the issues section. For deeper insights into the library's functionality, the API section in the documentation is a great resource. For any queries not suited for issues, please reach out to the lead developers via Slack or email.
 
 ## Citing
@@ -56,36 +63,46 @@ If you use ReservoirComputing.jl in your work, we kindly ask you to cite it. Her
 ```
 
 ## Reproducibility
+
 ```@raw html
 <details><summary>The documentation of this SciML package was built using these direct dependencies,</summary>
 ```
+
 ```@example
 using Pkg # hide
 Pkg.status() # hide
 ```
+
 ```@raw html
 </details>
 ```
+
 ```@raw html
 <details><summary>and using this machine and Julia version.</summary>
 ```
+
 ```@example
 using InteractiveUtils # hide
 versioninfo() # hide
 ```
+
 ```@raw html
 </details>
 ```
+
 ```@raw html
 <details><summary>A more complete overview of all dependencies and their versions is also provided.</summary>
 ```
+
 ```@example
 using Pkg # hide
-Pkg.status(;mode = PKGMODE_MANIFEST) # hide
+Pkg.status(; mode = PKGMODE_MANIFEST) # hide
 ```
+
 ```@raw html
 </details>
 ```
+
 ```@eval
 using TOML
 using Markdown
diff --git a/docs/src/reca_tutorials/reca.md b/docs/src/reca_tutorials/reca.md
index 68410541..65c88044 100644
--- a/docs/src/reca_tutorials/reca.md
+++ b/docs/src/reca_tutorials/reca.md
@@ -5,7 +5,9 @@ Reservoir Computing based on Elementary Cellular Automata (ECA) has been recentl
 To showcase how to use these models, this page illustrates the performance of ReCA in the 5 bit memory task [^4]. The script for the example and companion data can be found [here](https://github.com/MartinuzziFrancesco/reservoir-computing-examples/tree/main/reca).
 
 ## 5 bit memory task
+
 The data can be read as follows:
+
 ```@example reca
 using DelimitedFiles
 
@@ -14,6 +16,7 @@ output = readdlm("./5bitoutput.txt", ',', Float32)
 ```
 
 To use a ReCA model, it is necessary to define the rule one intends to use. To do so, ReservoirComputing.jl leverages [CellularAutomata.jl](https://github.com/MartinuzziFrancesco/CellularAutomata.jl) that needs to be called as well to define the `RECA` struct:
+
 ```@example reca
 using ReservoirComputing, CellularAutomata
 
@@ -21,18 +24,21 @@ ca = DCA(90)
 ```
 
 To define the ReCA model, it suffices to call:
+
 ```@example reca
-reca = RECA(input, ca; 
+reca = RECA(input, ca;
     generations = 16,
     input_encoding = RandomMapping(16, 40))
 ```
 
-After this, the training can be performed with the chosen method. 
+After this, the training can be performed with the chosen method.
+
 ```@example reca
 output_layer = train(reca, output, StandardRidge(0.00001))
 ```
 
 The prediction in this case will be a `Predictive()` with the input data equal to the training data. In addition, to test the 5 bit memory task, a conversion from Float to Bool is necessary (at the moment, we are aware of a bug that doesn't allow boolean input data to the RECA models):
+
 ```@example reca
 prediction = reca(Predictive(input), output_layer)
 final_pred = convert(AbstractArray{Float32}, prediction .> 0.5)
@@ -41,9 +47,6 @@ final_pred == output
 ```
 
 [^1]: Yilmaz, Ozgur. "Reservoir computing using cellular automata." arXiv preprint arXiv:1410.0162 (2014).
-
 [^2]: Margem, Mrwan, and Ozgür Yilmaz. "An experimental study on cellular automata reservoir in pathological sequence learning tasks." (2017).
-
 [^3]: Nichele, Stefano, and Andreas Molund. "Deep reservoir computing using cellular automata." arXiv preprint arXiv:1703.02806 (2017).
-
 [^4]: Hochreiter, Sepp, and Jürgen Schmidhuber. "Long short-term memory." Neural computation 9.8 (1997): 1735-1780.
diff --git a/src/esn/echostatenetwork.jl b/src/esn/echostatenetwork.jl
index 2fd774db..42fab481 100644
--- a/src/esn/echostatenetwork.jl
+++ b/src/esn/echostatenetwork.jl
@@ -91,15 +91,15 @@ esn = ESN(train_data, reservoir=RandSparseReservoir(200), washout=10)
 ```
 """
 function ESN(train_data;
-             variation = Default(),
-             input_layer = DenseLayer(),
-             reservoir = RandSparseReservoir(100),
-             bias = NullLayer(),
-             reservoir_driver = RNN(),
-             nla_type = NLADefault(),
-             states_type = StandardStates(),
-             washout = 0,
-             matrix_type = typeof(train_data))
+        variation = Default(),
+        input_layer = DenseLayer(),
+        reservoir = RandSparseReservoir(100),
+        bias = NullLayer(),
+        reservoir_driver = RNN(),
+        nla_type = NLADefault(),
+        states_type = StandardStates(),
+        washout = 0,
+        matrix_type = typeof(train_data))
     if variation isa Hybrid
         train_data = vcat(train_data, variation.model_data[:, 1:(end - 1)])
     end
@@ -107,19 +107,19 @@ function ESN(train_data;
     if states_type isa AbstractPaddedStates
         in_size = size(train_data, 1) + 1
         train_data = vcat(Adapt.adapt(matrix_type, ones(1, size(train_data, 2))),
-                          train_data)
+            train_data)
     else
         in_size = size(train_data, 1)
     end
 
     input_matrix, reservoir_matrix, bias_vector, res_size = obtain_layers(in_size,
-                                                                          input_layer,
-                                                                          reservoir, bias;
-                                                                          matrix_type = matrix_type)
+        input_layer,
+        reservoir, bias;
+        matrix_type = matrix_type)
 
     inner_res_driver = reservoir_driver_params(reservoir_driver, res_size, in_size)
     states = create_states(inner_res_driver, train_data, washout, reservoir_matrix,
-                           input_matrix, bias_vector)
+        input_matrix, bias_vector)
     train_data = train_data[:, (washout + 1):end]
 
     ESN(sum(res_size), train_data, variation, nla_type, input_matrix,
@@ -129,13 +129,13 @@ end
 
 #shallow esn construction
 function obtain_layers(in_size,
-                       input_layer,
-                       reservoir,
-                       bias;
-                       matrix_type = Matrix{Float64})
+        input_layer,
+        reservoir,
+        bias;
+        matrix_type = Matrix{Float64})
     input_res_size = get_ressize(reservoir)
     input_matrix = create_layer(input_layer, input_res_size, in_size,
-                                matrix_type = matrix_type)
+        matrix_type = matrix_type)
     res_size = size(input_matrix, 1) #WeightedInput actually changes the res size
     reservoir_matrix = create_reservoir(reservoir, res_size, matrix_type = matrix_type)
     @assert size(reservoir_matrix, 1) == res_size
@@ -147,10 +147,10 @@ end
 #there is a bug going on with WeightedLayer in this construction.
 #it works for eny other though
 function obtain_layers(in_size,
-                       input_layer,
-                       reservoir::Vector,
-                       bias;
-                       matrix_type = Matrix{Float64})
+        input_layer,
+        reservoir::Vector,
+        bias;
+        matrix_type = Matrix{Float64})
     esn_depth = length(reservoir)
     input_res_sizes = [get_ressize(reservoir[i]) for i in 1:esn_depth]
     in_sizes = zeros(Int, esn_depth)
@@ -159,16 +159,16 @@ function obtain_layers(in_size,
 
     if input_layer isa Array
         input_matrix = [create_layer(input_layer[j], input_res_sizes[j], in_sizes[j],
-                                     matrix_type = matrix_type) for j in 1:esn_depth]
+            matrix_type = matrix_type) for j in 1:esn_depth]
     else
         _input_layer = fill(input_layer, esn_depth)
         input_matrix = [create_layer(_input_layer[k], input_res_sizes[k], in_sizes[k],
-                                     matrix_type = matrix_type) for k in 1:esn_depth]
+            matrix_type = matrix_type) for k in 1:esn_depth]
     end
 
     res_sizes = [get_ressize(input_matrix[j]) for j in 1:esn_depth]
     reservoir_matrix = [create_reservoir(reservoir[k], res_sizes[k],
-                                         matrix_type = matrix_type) for k in 1:esn_depth]
+        matrix_type = matrix_type) for k in 1:esn_depth]
 
     if bias isa Array
         bias_vector = [create_layer(bias[j], res_sizes[j], 1, matrix_type = matrix_type)
@@ -183,9 +183,9 @@ function obtain_layers(in_size,
 end
 
 function (esn::ESN)(prediction::AbstractPrediction,
-                    output_layer::AbstractOutputLayer;
-                    last_state = esn.states[:, [end]],
-                    kwargs...)
+        output_layer::AbstractOutputLayer;
+        last_state = esn.states[:, [end]],
+        kwargs...)
     variation = esn.variation
     pred_len = prediction.prediction_len
 
@@ -197,11 +197,11 @@ function (esn::ESN)(prediction::AbstractPrediction,
         u0 = variation.model_data[:, end]
         model_pred_data = model(u0, tspan_new, predict_tsteps)[:, 2:end]
         return obtain_esn_prediction(esn, prediction, last_state, output_layer,
-                                     model_pred_data;
-                                     kwargs...)
+            model_pred_data;
+            kwargs...)
     else
         return obtain_esn_prediction(esn, prediction, last_state, output_layer;
-                                     kwargs...)
+            kwargs...)
     end
 end
 
@@ -245,8 +245,8 @@ trained_esn = train(esn, target_data, training_method=StandardRidge(1.0))
     and performs the actual training using the specified `training_method`.
 """
 function train(esn::AbstractEchoStateNetwork,
-               target_data,
-               training_method = StandardRidge(0.0))
+        target_data,
+        training_method = StandardRidge(0.0))
     variation = esn.variation
 
     if esn.variation isa Hybrid
diff --git a/src/esn/esn_input_layers.jl b/src/esn/esn_input_layers.jl
index 8347a4bc..e7bb950c 100644
--- a/src/esn/esn_input_layers.jl
+++ b/src/esn/esn_input_layers.jl
@@ -29,9 +29,9 @@ function WeightedLayer(; scaling = 0.1)
 end
 
 function create_layer(input_layer::WeightedLayer,
-                      approx_res_size,
-                      in_size;
-                      matrix_type = Matrix{Float64})
+        approx_res_size,
+        in_size;
+        matrix_type = Matrix{Float64})
     scaling = input_layer.scaling
     res_size = Int(floor(approx_res_size / in_size) * in_size)
     layer_matrix = zeros(res_size, in_size)
@@ -39,7 +39,7 @@ function create_layer(input_layer::WeightedLayer,
 
     for i in 1:in_size
         layer_matrix[((i - 1) * q + 1):((i) * q), i] = rand(Uniform(-scaling, scaling), 1,
-                                                            q)
+            q)
     end
 
     return Adapt.adapt(matrix_type, layer_matrix)
@@ -86,9 +86,9 @@ Generates a matrix layer of size `res_size` x `in_size`, constructed according t
 - A matrix representing the constructed layer.
 """
 function create_layer(input_layer::DenseLayer,
-                      res_size,
-                      in_size;
-                      matrix_type = Matrix{Float64})
+        res_size,
+        in_size;
+        matrix_type = Matrix{Float64})
     scaling = input_layer.scaling
     layer_matrix = rand(Uniform(-scaling, scaling), res_size, in_size)
     return Adapt.adapt(matrix_type, layer_matrix)
@@ -124,9 +124,9 @@ function SparseLayer(scaling_arg; scaling = scaling_arg, sparsity = 0.1)
 end
 
 function create_layer(input_layer::SparseLayer,
-                      res_size,
-                      in_size;
-                      matrix_type = Matrix{Float64})
+        res_size,
+        in_size;
+        matrix_type = Matrix{Float64})
     layer_matrix = Matrix(sprand(res_size, in_size, input_layer.sparsity))
     layer_matrix = 2.0 .* (layer_matrix .- 0.5)
     replace!(layer_matrix, -1.0 => 0.0)
@@ -234,9 +234,9 @@ function MinimumLayer(; weight = 0.1, sampling = BernoulliSample(0.5))
 end
 
 function create_layer(input_layer::MinimumLayer,
-                      res_size,
-                      in_size;
-                      matrix_type = Matrix{Float64})
+        res_size,
+        in_size;
+        matrix_type = Matrix{Float64})
     sampling = input_layer.sampling
     weight = input_layer.weight
     layer_matrix = create_minimum_input(sampling, res_size, in_size, weight)
@@ -315,9 +315,9 @@ function InformedLayer(model_in_size; scaling = 0.1, gamma = 0.5)
 end
 
 function create_layer(input_layer::InformedLayer,
-                      res_size,
-                      in_size;
-                      matrix_type = Matrix{Float64})
+        res_size,
+        in_size;
+        matrix_type = Matrix{Float64})
     scaling = input_layer.scaling
     state_size = in_size - input_layer.model_in_size
 
@@ -364,8 +364,8 @@ Creates a `NullLayer` initializer for Echo State Networks (ESNs) that generates
 struct NullLayer <: AbstractLayer end
 
 function create_layer(input_layer::NullLayer,
-                      res_size,
-                      in_size;
-                      matrix_type = Matrix{Float64})
+        res_size,
+        in_size;
+        matrix_type = Matrix{Float64})
     return Adapt.adapt(matrix_type, zeros(res_size, in_size))
 end
diff --git a/src/esn/esn_predict.jl b/src/esn/esn_predict.jl
index 73cddd38..daa6fc34 100644
--- a/src/esn/esn_predict.jl
+++ b/src/esn/esn_predict.jl
@@ -1,10 +1,10 @@
 function obtain_esn_prediction(esn,
-                               prediction::Generative,
-                               x,
-                               output_layer,
-                               args...;
-                               initial_conditions = output_layer.last_value,
-                               save_states = false)
+        prediction::Generative,
+        x,
+        output_layer,
+        args...;
+        initial_conditions = output_layer.last_value,
+        save_states = false)
     out_size = output_layer.out_size
     training_method = output_layer.training_method
     prediction_len = prediction.prediction_len
@@ -19,7 +19,7 @@ function obtain_esn_prediction(esn,
 
     for i in 1:prediction_len
         x, x_new = next_state_prediction!(esn, x, x_new, out, out_pad, i, tmp_array,
-                                          args...)
+            args...)
         out_tmp = get_prediction(output_layer.training_method, output_layer, x_new)
         out = store_results!(output_layer.training_method, out_tmp, output, i)
         states[:, i] = x
@@ -29,12 +29,12 @@ function obtain_esn_prediction(esn,
 end
 
 function obtain_esn_prediction(esn,
-                               prediction::Predictive,
-                               x,
-                               output_layer,
-                               args...;
-                               initial_conditions = output_layer.last_value,
-                               save_states = false)
+        prediction::Predictive,
+        x,
+        output_layer,
+        args...;
+        initial_conditions = output_layer.last_value,
+        save_states = false)
     out_size = output_layer.out_size
     training_method = output_layer.training_method
     prediction_len = prediction.prediction_len
@@ -49,7 +49,7 @@ function obtain_esn_prediction(esn,
 
     for i in 1:prediction_len
         x, x_new = next_state_prediction!(esn, x, x_new, prediction.prediction_data[:, i],
-                                          out_pad, i, tmp_array, args...)
+            out_pad, i, tmp_array, args...)
         out_tmp = get_prediction(training_method, output_layer, x_new)
         out = store_results!(training_method, out_tmp, output, i)
         states[:, i] = x
@@ -61,40 +61,40 @@ end
 #prediction dispatch on esn 
 function next_state_prediction!(esn::ESN, x, x_new, out, out_pad, i, tmp_array, args...)
     return _variation_prediction!(esn.variation, esn, x, x_new, out, out_pad, i, tmp_array,
-                                  args...)
+        args...)
 end
 
 #dispatch the prediction on the esn variation
 function _variation_prediction!(variation,
-                                esn,
-                                x,
-                                x_new,
-                                out,
-                                out_pad,
-                                i,
-                                tmp_array,
-                                args...)
+        esn,
+        x,
+        x_new,
+        out,
+        out_pad,
+        i,
+        tmp_array,
+        args...)
     out_pad = pad_state!(esn.states_type, out_pad, out)
     xv = @view x[1:(esn.res_size)]
     x = next_state!(x, esn.reservoir_driver, xv, out_pad,
-                    esn.reservoir_matrix, esn.input_matrix, esn.bias_vector, tmp_array)
+        esn.reservoir_matrix, esn.input_matrix, esn.bias_vector, tmp_array)
     x_new = esn.states_type(esn.nla_type, x, out_pad)
     return x, x_new
 end
 
 function _variation_prediction!(variation::Hybrid,
-                                esn,
-                                x,
-                                x_new,
-                                out,
-                                out_pad,
-                                i,
-                                tmp_array,
-                                model_prediction_data)
+        esn,
+        x,
+        x_new,
+        out,
+        out_pad,
+        i,
+        tmp_array,
+        model_prediction_data)
     out_tmp = vcat(out, model_prediction_data[:, i])
     out_pad = pad_state!(esn.states_type, out_pad, out_tmp)
     x = next_state!(x, esn.reservoir_driver, x[1:(esn.res_size)], out_pad,
-                    esn.reservoir_matrix, esn.input_matrix, esn.bias_vector, tmp_array)
+        esn.reservoir_matrix, esn.input_matrix, esn.bias_vector, tmp_array)
     x_tmp = vcat(x, model_prediction_data[:, i])
     x_new = esn.states_type(esn.nla_type, x_tmp, out_pad)
     return x, x_new
diff --git a/src/esn/esn_reservoir_drivers.jl b/src/esn/esn_reservoir_drivers.jl
index cebbc915..6ab198d7 100644
--- a/src/esn/esn_reservoir_drivers.jl
+++ b/src/esn/esn_reservoir_drivers.jl
@@ -26,11 +26,11 @@ Create and return the trained Echo State Network (ESN) states according to the s
 This function is responsible for creating and returning the states of the ESN during training based on the provided training data and parameters.
 """
 function create_states(reservoir_driver::AbstractReservoirDriver,
-                       train_data,
-                       washout,
-                       reservoir_matrix,
-                       input_matrix,
-                       bias_vector)
+        train_data,
+        washout,
+        reservoir_matrix,
+        input_matrix,
+        bias_vector)
     train_len = size(train_data, 2) - washout
     res_size = size(reservoir_matrix, 1)
 
@@ -41,13 +41,13 @@ function create_states(reservoir_driver::AbstractReservoirDriver,
     for i in 1:washout
         yv = @view train_data[:, i]
         _state = next_state!(_state, reservoir_driver, _state, yv, reservoir_matrix,
-                             input_matrix, bias_vector, tmp_array)
+            input_matrix, bias_vector, tmp_array)
     end
 
     for j in 1:train_len
         yv = @view train_data[:, washout + j]
         _state = next_state!(_state, reservoir_driver, _state, yv,
-                             reservoir_matrix, input_matrix, bias_vector, tmp_array)
+            reservoir_matrix, input_matrix, bias_vector, tmp_array)
         states[:, j] = _state
     end
 
@@ -55,11 +55,11 @@ function create_states(reservoir_driver::AbstractReservoirDriver,
 end
 
 function create_states(reservoir_driver::AbstractReservoirDriver,
-                       train_data,
-                       washout,
-                       reservoir_matrix::Vector,
-                       input_matrix,
-                       bias_vector)
+        train_data,
+        washout,
+        reservoir_matrix::Vector,
+        input_matrix,
+        bias_vector)
     train_len = size(train_data, 2) - washout
     res_size = sum([size(reservoir_matrix[i], 1) for i in 1:length(reservoir_matrix)])
 
@@ -70,17 +70,17 @@ function create_states(reservoir_driver::AbstractReservoirDriver,
     for i in 1:washout
         for j in 1:length(reservoir_matrix)
             _inter_state = next_state!(_inter_state, reservoir_driver, _inter_state,
-                                       train_data[:, i],
-                                       reservoir_matrix, input_matrix, bias_vector,
-                                       tmp_array)
+                train_data[:, i],
+                reservoir_matrix, input_matrix, bias_vector,
+                tmp_array)
         end
         _state = next_state!(_state, reservoir_driver, _state, train_data[:, i],
-                             reservoir_matrix, input_matrix, bias_vector, tmp_array)
+            reservoir_matrix, input_matrix, bias_vector, tmp_array)
     end
 
     for j in 1:train_len
         _state = next_state!(_state, reservoir_driver, _state, train_data[:, washout + j],
-                             reservoir_matrix, input_matrix, bias_vector, tmp_array)
+            reservoir_matrix, input_matrix, bias_vector, tmp_array)
         states[:, j] = _state
     end
 
@@ -180,10 +180,10 @@ This function creates an MRNN object with the specified activation functions, le
     time-series prediction._" Neurocomputing 159 (2015): 58-66.
 """
 function MRNN(
-              ;
-              activation_function = [tanh, sigmoid],
-              leaky_coefficient = 1.0,
-              scaling_factor = fill(leaky_coefficient, length(activation_function)))
+        ;
+        activation_function = [tanh, sigmoid],
+        leaky_coefficient = 1.0,
+        scaling_factor = fill(leaky_coefficient, length(activation_function)))
     @assert length(activation_function) == length(scaling_factor)
     return MRNN(activation_function, leaky_coefficient, scaling_factor)
 end
@@ -283,12 +283,12 @@ A GRUParams object containing the parameters needed for the GRU-based reservoir
     arXiv preprint arXiv:1406.1078 (2014).
 """
 function GRU(
-             ;
-             activation_function = [NNlib.sigmoid, NNlib.sigmoid, tanh],
-             inner_layer = fill(DenseLayer(), 2),
-             reservoir = fill(RandSparseReservoir(0), 2),
-             bias = fill(DenseLayer(), 2),
-             variant = FullyGated())
+        ;
+        activation_function = [NNlib.sigmoid, NNlib.sigmoid, tanh],
+        inner_layer = fill(DenseLayer(), 2),
+        reservoir = fill(RandSparseReservoir(0), 2),
+        bias = fill(DenseLayer(), 2),
+        variant = FullyGated())
     return GRU(activation_function, inner_layer, reservoir, bias, variant)
 end
 
diff --git a/src/esn/esn_reservoirs.jl b/src/esn/esn_reservoirs.jl
index c4c3a86f..e014e4e7 100644
--- a/src/esn/esn_reservoirs.jl
+++ b/src/esn/esn_reservoirs.jl
@@ -54,8 +54,8 @@ A matrix representing the reservoir, generated based on the properties of the sp
 The choice of reservoir initialization is crucial in Echo State Networks (ESNs) for achieving effective temporal modeling. Specific references for reservoir initialization methods may vary based on the type of reservoir used, but the practice of initializing reservoirs for ESNs is widely documented in the ESN literature.
 """
 function create_reservoir(reservoir::RandSparseReservoir,
-                          res_size;
-                          matrix_type = Matrix{Float64})
+        res_size;
+        matrix_type = Matrix{Float64})
     reservoir_matrix = Matrix(sprand(res_size, res_size, reservoir.sparsity))
     reservoir_matrix = 2.0 .* (reservoir_matrix .- 0.5)
     replace!(reservoir_matrix, -1.0 => 0.0)
@@ -93,10 +93,10 @@ struct PseudoSVDReservoir{T, C} <: AbstractReservoir
 end
 
 function PseudoSVDReservoir(res_size;
-                            max_value = 1.0,
-                            sparsity = 0.1,
-                            sorted = true,
-                            reverse_sort = false)
+        max_value = 1.0,
+        sparsity = 0.1,
+        sorted = true,
+        reverse_sort = false)
     return PseudoSVDReservoir(res_size, max_value, sparsity, sorted, reverse_sort)
 end
 
@@ -122,22 +122,22 @@ This reservoir initialization method, based on a pseudo-SVD approach, is inspire
 [^yang]: Yang, Cuili, et al. "_Design of polynomial echo state networks for time series prediction._" Neurocomputing 290 (2018): 148-160.
 """
 function PseudoSVDReservoir(res_size, max_value, sparsity; sorted = true,
-                            reverse_sort = false)
+        reverse_sort = false)
     return PseudoSVDReservoir(res_size, max_value, sparsity, sorted, reverse_sort)
 end
 
 function create_reservoir(reservoir::PseudoSVDReservoir,
-                          res_size;
-                          matrix_type = Matrix{Float64})
+        res_size;
+        matrix_type = Matrix{Float64})
     sorted = reservoir.sorted
     reverse_sort = reservoir.reverse_sort
     reservoir_matrix = create_diag(res_size, reservoir.max_value, sorted = sorted,
-                                   reverse_sort = reverse_sort)
+        reverse_sort = reverse_sort)
     tmp_sparsity = get_sparsity(reservoir_matrix, res_size)
 
     while tmp_sparsity <= reservoir.sparsity
         reservoir_matrix *= create_qmatrix(res_size, rand(1:res_size), rand(1:res_size),
-                                           rand() * 2 - 1)
+            rand() * 2 - 1)
         tmp_sparsity = get_sparsity(reservoir_matrix, res_size)
     end
 
@@ -214,8 +214,8 @@ function DelayLineReservoir(res_size; weight = 0.1)
 end
 
 function create_reservoir(reservoir::DelayLineReservoir,
-                          res_size;
-                          matrix_type = Matrix{Float64})
+        res_size;
+        matrix_type = Matrix{Float64})
     reservoir_matrix = zeros(res_size, res_size)
 
     for i in 1:(res_size - 1)
@@ -258,8 +258,8 @@ function DelayLineBackwardReservoir(res_size; weight = 0.1, fb_weight = 0.2)
 end
 
 function create_reservoir(reservoir::DelayLineBackwardReservoir,
-                          res_size;
-                          matrix_type = Matrix{Float64})
+        res_size;
+        matrix_type = Matrix{Float64})
     reservoir_matrix = zeros(res_size, res_size)
 
     for i in 1:(res_size - 1)
@@ -301,8 +301,8 @@ function SimpleCycleReservoir(res_size; weight = 0.1)
 end
 
 function create_reservoir(reservoir::SimpleCycleReservoir,
-                          res_size;
-                          matrix_type = Matrix{Float64})
+        res_size;
+        matrix_type = Matrix{Float64})
     reservoir_matrix = zeros(Float64, res_size, res_size)
 
     for i in 1:(res_size - 1)
@@ -348,8 +348,8 @@ function CycleJumpsReservoir(res_size; cycle_weight = 0.1, jump_weight = 0.1, ju
 end
 
 function create_reservoir(reservoir::CycleJumpsReservoir,
-                          res_size;
-                          matrix_type = Matrix{Float64})
+        res_size;
+        matrix_type = Matrix{Float64})
     reservoir_matrix = zeros(res_size, res_size)
 
     for i in 1:(res_size - 1)
@@ -387,7 +387,7 @@ A `NullReservoir` object.
 struct NullReservoir <: AbstractReservoir end
 
 function create_reservoir(reservoir::NullReservoir,
-                          res_size;
-                          matrix_type = Matrix{Float64})
+        res_size;
+        matrix_type = Matrix{Float64})
     return Adapt.adapt(matrix_type, zeros(res_size, res_size))
 end
diff --git a/src/predict.jl b/src/predict.jl
index 51a23d53..5ec715f2 100644
--- a/src/predict.jl
+++ b/src/predict.jl
@@ -1,9 +1,9 @@
 function obtain_prediction(rc::AbstractReservoirComputer,
-                           prediction::Generative,
-                           x,
-                           output_layer,
-                           args...;
-                           initial_conditions = output_layer.last_value)
+        prediction::Generative,
+        x,
+        output_layer,
+        args...;
+        initial_conditions = output_layer.last_value)
     #x = last_state
     prediction_len = prediction.prediction_len
     train_method = output_layer.training_method
@@ -21,11 +21,11 @@ function obtain_prediction(rc::AbstractReservoirComputer,
 end
 
 function obtain_prediction(rc::AbstractReservoirComputer,
-                           prediction::Predictive,
-                           x,
-                           output_layer,
-                           args...;
-                           kwargs...)
+        prediction::Predictive,
+        x,
+        output_layer,
+        args...;
+        kwargs...)
     prediction_len = prediction.prediction_len
     train_method = output_layer.training_method
     out_size = output_layer.out_size
diff --git a/src/reca/reca.jl b/src/reca/reca.jl
index 0a54a881..1568964f 100644
--- a/src/reca/reca.jl
+++ b/src/reca/reca.jl
@@ -25,11 +25,11 @@ arXiv preprint arXiv:1410.0162 (2014).
 automata._” arXiv preprint arXiv:1703.02806 (2017).
 """
 function RECA(train_data,
-              automata;
-              generations = 8,
-              input_encoding = RandomMapping(),
-              nla_type = NLADefault(),
-              states_type = StandardStates())
+        automata;
+        generations = 8,
+        input_encoding = RandomMapping(),
+        nla_type = NLADefault(),
+        states_type = StandardStates())
     in_size = size(train_data, 1)
     #res_size = obtain_res_size(input_encoding, generations)
     state_encoding = create_encoding(input_encoding, train_data, generations)
@@ -46,11 +46,11 @@ end
 
 #predict dispatch
 function (reca::RECA)(prediction,
-                      output_layer::AbstractOutputLayer,
-                      initial_conditions = output_layer.last_value,
-                      last_state = zeros(reca.input_encoding.ca_size))
+        output_layer::AbstractOutputLayer,
+        initial_conditions = output_layer.last_value,
+        last_state = zeros(reca.input_encoding.ca_size))
     return obtain_prediction(reca, prediction, last_state, output_layer;
-                             initial_conditions = initial_conditions)
+        initial_conditions = initial_conditions)
 end
 
 function next_state_prediction!(reca::RECA, x, out, i, args...)
diff --git a/src/reca/reca_input_encodings.jl b/src/reca/reca_input_encodings.jl
index 5a3923b6..8fdd5233 100644
--- a/src/reca/reca_input_encodings.jl
+++ b/src/reca/reca_input_encodings.jl
@@ -41,7 +41,7 @@ function create_encoding(rm::RandomMapping, input_data, generations)
     states_size = generations * rm.expansion_size * rm.permutations
     ca_size = rm.expansion_size * rm.permutations
     return RandomMaps(rm.permutations, rm.expansion_size, generations, maps, states_size,
-                      ca_size)
+        ca_size)
 end
 
 function reca_create_states(rm::RandomMaps, automata, input_data)
@@ -67,9 +67,9 @@ function encoding(rm::RandomMaps, input_vector, tot_encoded_vector)
 
     for i in 1:(rm.permutations)
         new_tot_enc_vec[((i - 1) * rm.expansion_size + 1):(i * rm.expansion_size)] = single_encoding(input_vector,
-                                                                                                     new_tot_enc_vec[((i - 1) * rm.expansion_size + 1):(i * rm.expansion_size)],
-                                                                                                     rm.maps[i,
-                                                                                                             :])
+            new_tot_enc_vec[((i - 1) * rm.expansion_size + 1):(i * rm.expansion_size)],
+            rm.maps[i,
+                :])
     end
 
     return new_tot_enc_vec
diff --git a/src/states.jl b/src/states.jl
index c1ef6648..32cbb443 100644
--- a/src/states.jl
+++ b/src/states.jl
@@ -207,4 +207,4 @@ function nla(::NLAT3, x_old)
     end
 
     return x_new
-end
\ No newline at end of file
+end
diff --git a/src/train/linear_regression.jl b/src/train/linear_regression.jl
index 1b7946dc..b1fc2c22 100644
--- a/src/train/linear_regression.jl
+++ b/src/train/linear_regression.jl
@@ -41,15 +41,15 @@ apart from the solver choice. MLJLinearModels.jl needs to be called in order
 to use these models.
 """
 function LinearModel(
-                     ; regression = LinearRegression,
-                     solver = Analytical(),
-                     regression_kwargs = (;))
+        ; regression = LinearRegression,
+        solver = Analytical(),
+        regression_kwargs = (;))
     return LinearModel(regression, solver, regression_kwargs)
 end
 
 function LinearModel(regression;
-                     solver = Analytical(),
-                     regression_kwargs = (;))
+        solver = Analytical(),
+        regression_kwargs = (;))
     return LinearModel(regression, solver, regression_kwargs)
 end
 
@@ -59,7 +59,7 @@ function _train(states, target_data, linear::LinearModel)
     for i in 1:size(target_data, 1)
         regressor = linear.regression(; fit_intercept = false, linear.regression_kwargs...)
         output_layer[i, :] = MLJLinearModels.fit(regressor, states',
-                                                 target_data[i, :], solver = linear.solver)
+            target_data[i, :], solver = linear.solver)
     end
 
     return OutputLayer(linear, output_layer, out_size, target_data[:, end])
diff --git a/test/esn/test_drivers.jl b/test/esn/test_drivers.jl
index 1739c836..db1c7b01 100644
--- a/test/esn/test_drivers.jl
+++ b/test/esn/test_drivers.jl
@@ -12,23 +12,23 @@ const training_method = StandardRidge(10e-6)
 
 Random.seed!(77)
 esn = ESN(input_data;
-          reservoir = RandSparseReservoir(res_size, 1.2, 0.1),
-          reservoir_driver = GRU(variant = FullyGated(),
-                                 reservoir = [
-                                     RandSparseReservoir(res_size, 1.0, 0.5),
-                                     RandSparseReservoir(res_size, 1.2, 0.1),
-                                 ]))
+    reservoir = RandSparseReservoir(res_size, 1.2, 0.1),
+    reservoir_driver = GRU(variant = FullyGated(),
+        reservoir = [
+            RandSparseReservoir(res_size, 1.0, 0.5),
+            RandSparseReservoir(res_size, 1.2, 0.1),
+        ]))
 
 output_layer = train(esn, target_data, training_method)
 output = esn(Predictive(target_data), output_layer, initial_conditions = target_data[1])
 @test mean(abs.(target_data .- output)) ./ mean(abs.(target_data)) < 0.11
 
 esn = ESN(input_data;
-          reservoir = RandSparseReservoir(res_size, 1.2, 0.1),
-          reservoir_driver = GRU(variant = Minimal(),
-                                 reservoir = RandSparseReservoir(res_size, 1.0, 0.5),
-                                 inner_layer = DenseLayer(),
-                                 bias = DenseLayer()))
+    reservoir = RandSparseReservoir(res_size, 1.2, 0.1),
+    reservoir_driver = GRU(variant = Minimal(),
+        reservoir = RandSparseReservoir(res_size, 1.0, 0.5),
+        inner_layer = DenseLayer(),
+        bias = DenseLayer()))
 
 output_layer = train(esn, target_data, training_method)
 output = esn(Predictive(target_data), output_layer, initial_conditions = target_data[1])
@@ -36,30 +36,30 @@ output = esn(Predictive(target_data), output_layer, initial_conditions = target_
 
 #multiple rnn
 esn = ESN(input_data;
-          reservoir = RandSparseReservoir(res_size, 1.2, 0.1),
-          reservoir_driver = MRNN(activation_function = (tanh, sigmoid),
-                                  scaling_factor = (0.8, 0.1)))
+    reservoir = RandSparseReservoir(res_size, 1.2, 0.1),
+    reservoir_driver = MRNN(activation_function = (tanh, sigmoid),
+        scaling_factor = (0.8, 0.1)))
 output_layer = train(esn, target_data, training_method)
 output = esn(Predictive(target_data), output_layer, initial_conditions = target_data[1])
 @test mean(abs.(target_data .- output)) ./ mean(abs.(target_data)) < 0.11
 
 #deep esn
 esn = ESN(input_data;
-          reservoir = [
-              RandSparseReservoir(res_size, 1.2, 0.1),
-              RandSparseReservoir(res_size, 1.2, 0.1),
-          ])
+    reservoir = [
+        RandSparseReservoir(res_size, 1.2, 0.1),
+        RandSparseReservoir(res_size, 1.2, 0.1),
+    ])
 output_layer = train(esn, target_data, training_method)
 output = esn(Predictive(target_data), output_layer, initial_conditions = target_data[1])
 @test mean(abs.(target_data .- output)) ./ mean(abs.(target_data)) < 0.11
 
 esn = ESN(input_data;
-          reservoir = [
-              RandSparseReservoir(res_size, 1.2, 0.1),
-              RandSparseReservoir(res_size, 1.2, 0.1),
-          ],
-          input_layer = [DenseLayer(), DenseLayer()],
-          bias = [NullLayer(), NullLayer()])
+    reservoir = [
+        RandSparseReservoir(res_size, 1.2, 0.1),
+        RandSparseReservoir(res_size, 1.2, 0.1),
+    ],
+    input_layer = [DenseLayer(), DenseLayer()],
+    bias = [NullLayer(), NullLayer()])
 output_layer = train(esn, target_data, training_method)
 output = esn(Predictive(target_data), output_layer, initial_conditions = target_data[1])
 @test mean(abs.(target_data .- output)) ./ mean(abs.(target_data)) < 0.11
diff --git a/test/esn/test_hybrid.jl b/test/esn/test_hybrid.jl
index a28a0981..4f858208 100644
--- a/test/esn/test_hybrid.jl
+++ b/test/esn/test_hybrid.jl
@@ -34,8 +34,8 @@ hybrid = Hybrid(prior_model_data_generator, u0, tspan_train, train_len)
 
 Random.seed!(77)
 esn = ESN(input_data,
-          reservoir = RandSparseReservoir(300),
-          variation = hybrid)
+    reservoir = RandSparseReservoir(300),
+    variation = hybrid)
 
 output_layer = train(esn, target_data, StandardRidge(0.3))
 
diff --git a/test/esn/test_nla.jl b/test/esn/test_nla.jl
index 2b0a7327..b1b0c0cb 100644
--- a/test/esn/test_nla.jl
+++ b/test/esn/test_nla.jl
@@ -15,8 +15,8 @@ nlas = [NLADefault(), NLAT1(), NLAT2(), NLAT3()]
 for n in nlas
     Random.seed!(77)
     esn = ESN(input_data;
-              reservoir = RandSparseReservoir(res_size, 1.2, 0.1),
-              nla_type = n)
+        reservoir = RandSparseReservoir(res_size, 1.2, 0.1),
+        nla_type = n)
     output_layer = train(esn, target_data, training_method)
     output = esn(Generative(predict_len), output_layer)
     @test maximum(abs.(test .- output)) ./ maximum(abs.(test)) < 0.1
diff --git a/test/esn/test_reservoirs.jl b/test/esn/test_reservoirs.jl
index 8650c1f7..ac751712 100644
--- a/test/esn/test_reservoirs.jl
+++ b/test/esn/test_reservoirs.jl
@@ -22,7 +22,7 @@ reservoir_matrix = create_reservoir(reservoir_constructor, res_size)
 @test maximum(reservoir_matrix) <= radius
 
 reservoir_constructor = PseudoSVDReservoir(res_size, max_value = radius,
-                                           sparsity = sparsity)
+    sparsity = sparsity)
 reservoir_matrix = create_reservoir(reservoir_constructor, res_size)
 @test size(reservoir_matrix) == (res_size, res_size)
 @test maximum(reservoir_matrix) <= radius
@@ -45,7 +45,7 @@ reservoir_matrix = create_reservoir(reservoir_constructor, res_size)
 @test maximum(reservoir_matrix) == weight
 
 reservoir_constructor = DelayLineBackwardReservoir(res_size, weight = weight,
-                                                   fb_weight = weight)
+    fb_weight = weight)
 reservoir_matrix = create_reservoir(reservoir_constructor, res_size)
 @test size(reservoir_matrix) == (res_size, res_size)
 @test maximum(reservoir_matrix) == weight
@@ -68,7 +68,7 @@ reservoir_matrix = create_reservoir(reservoir_constructor, res_size)
 @test maximum(reservoir_matrix) == weight
 
 reservoir_constructor = CycleJumpsReservoir(res_size, cycle_weight = weight,
-                                            jump_weight = weight, jump_size = jump_size)
+    jump_weight = weight, jump_size = jump_size)
 reservoir_matrix = create_reservoir(reservoir_constructor, res_size)
 @test size(reservoir_matrix) == (res_size, res_size)
 @test maximum(reservoir_matrix) == weight
diff --git a/test/esn/test_states.jl b/test/esn/test_states.jl
index 1b2b8d71..a8d80cb0 100644
--- a/test/esn/test_states.jl
+++ b/test/esn/test_states.jl
@@ -15,7 +15,7 @@ states_types = [StandardStates, ExtendedStates, PaddedStates, PaddedExtendedStat
 for t in states_types
     Random.seed!(77)
     esn = ESN(input_data;
-              reservoir = RandSparseReservoir(res_size, 1.2, 0.1))
+        reservoir = RandSparseReservoir(res_size, 1.2, 0.1))
     output_layer = train(esn, target_data, training_method)
     output = esn(Generative(predict_len), output_layer)
     @test maximum(abs.(test_data .- output)) ./ maximum(abs.(test_data)) < 0.1
diff --git a/test/esn/test_train.jl b/test/esn/test_train.jl
index 731ff773..dc0205d4 100644
--- a/test/esn/test_train.jl
+++ b/test/esn/test_train.jl
@@ -12,7 +12,7 @@ const reg = 10e-6
 
 Random.seed!(77)
 esn = ESN(input_data;
-          reservoir = RandSparseReservoir(res_size, 1.2, 0.1))
+    reservoir = RandSparseReservoir(res_size, 1.2, 0.1))
 
 training_methods = [
     StandardRidge(regularization_coeff = reg),
diff --git a/test/reca/test_predictive.jl b/test/reca/test_predictive.jl
index c5ca95d4..1fb682c9 100644
--- a/test/reca/test_predictive.jl
+++ b/test/reca/test_predictive.jl
@@ -6,8 +6,8 @@ const g = 6
 const rule = 90
 
 reca = RECA(input, DCA(rule);
-            generations = g,
-            input_encoding = RandomMapping(6, 10))
+    generations = g,
+    input_encoding = RandomMapping(6, 10))
 
 output_layer = train(reca, output, StandardRidge(0.001))
 prediction = reca(Predictive(input), output_layer)
diff --git a/test/runtests.jl b/test/runtests.jl
index 9b5d73f9..77d000f2 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,20 +2,40 @@ using SafeTestsets
 using Test
 
 @testset "Common Utilities   " begin
-    @safetestset "Quality Assurance" begin include("qa.jl") end
-    @safetestset "States" begin include("test_states.jl") end
+    @safetestset "Quality Assurance" begin
+        include("qa.jl")
+    end
+    @safetestset "States" begin
+        include("test_states.jl")
+    end
 end
 
 @testset "Echo State Networks" begin
-    @safetestset "ESN Input Layers" begin include("esn/test_input_layers.jl") end
-    @safetestset "ESN Reservoirs" begin include("esn/test_reservoirs.jl") end
-    @safetestset "ESN States" begin include("esn/test_states.jl") end
-    @safetestset "ESN Train and Predict" begin include("esn/test_train.jl") end
-    @safetestset "ESN Drivers" begin include("esn/test_drivers.jl") end
-    @safetestset "ESN Non Linear Algos" begin include("esn/test_nla.jl") end
-    @safetestset "Hybrid ESN" begin include("esn/test_hybrid.jl") end
+    @safetestset "ESN Input Layers" begin
+        include("esn/test_input_layers.jl")
+    end
+    @safetestset "ESN Reservoirs" begin
+        include("esn/test_reservoirs.jl")
+    end
+    @safetestset "ESN States" begin
+        include("esn/test_states.jl")
+    end
+    @safetestset "ESN Train and Predict" begin
+        include("esn/test_train.jl")
+    end
+    @safetestset "ESN Drivers" begin
+        include("esn/test_drivers.jl")
+    end
+    @safetestset "ESN Non Linear Algos" begin
+        include("esn/test_nla.jl")
+    end
+    @safetestset "Hybrid ESN" begin
+        include("esn/test_hybrid.jl")
+    end
 end
 
 @testset "CA based Reservoirs" begin
-    @safetestset "RECA" begin include("reca/test_predictive.jl") end
+    @safetestset "RECA" begin
+        include("reca/test_predictive.jl")
+    end
 end
diff --git a/test/test_states.jl b/test/test_states.jl
index becb8df5..c8808bbf 100644
--- a/test/test_states.jl
+++ b/test/test_states.jl
@@ -38,12 +38,12 @@ padded_array = states_type(NLADefault(), test_array, extension)
 states_type = PaddedExtendedStates(padding = padding)
 padded_extended_array = states_type(NLADefault(), test_array, extension)
 @test padded_extended_array == reshape(vcat(padding, extension, test_array),
-              length(test_array) + length(extension) + 1, 1)
+    length(test_array) + length(extension) + 1, 1)
 
 states_type = PaddedExtendedStates(padding)
 padded_extended_array = states_type(NLADefault(), test_array, extension)
 @test padded_extended_array == reshape(vcat(padding, extension, test_array),
-              length(test_array) + length(extension) + 1, 1)
+    length(test_array) + length(extension) + 1, 1)
 
 states_type = ExtendedStates()
 extended_array = states_type(NLADefault(), test_array, extension)