mrnn - iim - cdrec - stmvl - linux env

eXascaleInfolab · Sep 24, 2024 · efb2c1c · efb2c1c
1 parent 7b64e2c
commit efb2c1c
Show file tree

Hide file tree

Showing 15 changed files with 306 additions and 47 deletions.
diff --git a/.github/workflows/pytest_imp_stmvl.yml b/.github/workflows/pytest_imp_stmvl.yml
@@ -0,0 +1,32 @@
+
+name: Pytest - ImputeGAP - Imputation - ST-MVL - 8
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        lfs: true
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.8'
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install libmlpack-dev
+        sudo apt-get install libopenblas-dev
+        pip install -r requirements.txt
+        pip install mypy
+        pip install pytest
+
+    - name: Run pytest
+      run: python -m pytest ./tests/test_imputation_stmvl.py
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/imputegap/algorithms/__pycache__/cdrec.cpython-312.pyc b/imputegap/algorithms/__pycache__/cdrec.cpython-312.pyc
diff --git a/imputegap/algorithms/__pycache__/stmvl.cpython-312.pyc b/imputegap/algorithms/__pycache__/stmvl.cpython-312.pyc
diff --git a/imputegap/algorithms/cdrec.py b/imputegap/algorithms/cdrec.py
@@ -1,51 +1,56 @@
-import numpy as np
 import ctypes
 import os
 import platform
-import os.path as __os_path_import;
 import ctypes as __native_c_types_import;
 import numpy as __numpy_import;
 
-
 def __marshal_as_numpy_column(__ctype_container, __py_sizen, __py_sizem):
     __numpy_marshal = __numpy_import.array(__ctype_container).reshape(__py_sizem, __py_sizen).T;
 
     return __numpy_marshal;
 
-
 def __marshal_as_native_column(__py_matrix):
     __py_input_flat = __numpy_import.ndarray.flatten(__py_matrix.T);
     __ctype_marshal = __numpy_import.ctypeslib.as_ctypes(__py_input_flat);
 
     return __ctype_marshal;
 
-def native_cdrec_param(__py_matrix, __py_rank, __py_eps, __py_iters):
+
+def load_share_lib(name = "lib_algo"):
     """
-    Recovers missing values (designated as NaN) in a matrix. Supports additional parameters
-    :param __py_matrix: 2D array
-    :param __py_rank: truncation rank to be used (0 = detect truncation automatically)
-    :param __py_eps: threshold for difference during recovery
-    :param __py_iters: maximum number of allowed iterations for the algorithms
-    :return: 2D array recovered matrix
+    Determine the OS and load the correct shared library
+    :param name: name of the library
+    :return: the correct path to the library
     """
 
-    # Determine the OS and load the correct shared library
-
-    local_path_win = './algorithms/lib/lib_algo.dll'
-    local_path_lin = './algorithms/lib/lib_algo.so'
+    local_path_win = './algorithms/lib/'+name+'.dll'
+    local_path_lin = './algorithms/lib/'+name+'.so'
 
     if not os.path.exists(local_path_win):
-        local_path_win = './imputegap/algorithms/lib/lib_algo.dll'
-        local_path_lin = './imputegap/algorithms/lib/lib_algo.so'
+        local_path_win = './imputegap/algorithms/lib/'+name+'.dll'
+        local_path_lin = './imputegap/algorithms/lib/'+name+'.so'
 
     if platform.system() == 'Windows':
         lib_path = os.path.join(local_path_win)
     else:
         lib_path = os.path.join(local_path_lin)
+    print("\n", lib_path, " has been loaded...")
+
+    return ctypes.CDLL(lib_path)
 
-    print("\n", lib_path , " has been loaded...")
 
-    cdrec_lib = ctypes.CDLL(lib_path)
+
+def native_cdrec(__py_matrix, __py_rank, __py_eps, __py_iters):
+    """
+    Recovers missing values (designated as NaN) in a matrix. Supports additional parameters
+    :param __py_matrix: 2D array
+    :param __py_rank: truncation rank to be used (0 = detect truncation automatically)
+    :param __py_eps: threshold for difference during recovery
+    :param __py_iters: maximum number of allowed iterations for the algorithms
+    :return: 2D array recovered matrix
+    """
+
+    shared_lib = load_share_lib()
 
     __py_sizen = len(__py_matrix);
     __py_sizem = len(__py_matrix[0]);
@@ -70,7 +75,7 @@ def native_cdrec_param(__py_matrix, __py_rank, __py_eps, __py_iters):
     #         double *matrixNative, size_t dimN, size_t dimM,
     #         size_t truncation, double epsilon, size_t iters
     # )
-    cdrec_lib.cdrec_imputation_parametrized(
+    shared_lib.cdrec_imputation_parametrized(
         __ctype_input_matrix, __ctype_sizen, __ctype_sizem,
         __ctype_rank, __ctype_eps, __ctype_iters
     );
@@ -79,12 +84,11 @@ def native_cdrec_param(__py_matrix, __py_rank, __py_eps, __py_iters):
 
     return __py_recovered;
 
-def cdrec(ground_truth, contamination, truncation_rank, iterations, epsilon):
+def cdrec(contamination, truncation_rank, iterations, epsilon):
     """
     CDREC algorithm for imputation of missing data
     @author : Quentin Nater
 
-    :param ground_truth: original time series without contamination
     :param contamination: time series with contamination
     :param truncation_rank: rank of reduction of the matrix (must be higher than 1 and smaller than the limit of series)
     :param epsilon : learning rate
@@ -95,7 +99,7 @@ def cdrec(ground_truth, contamination, truncation_rank, iterations, epsilon):
     """
 
     # Call the C++ function to perform recovery
-    imputed_matrix = native_cdrec_param(contamination, truncation_rank, epsilon, iterations)
+    imputed_matrix = native_cdrec(contamination, truncation_rank, epsilon, iterations)
 
     return imputed_matrix
 

diff --git a/imputegap/algorithms/stmvl.py b/imputegap/algorithms/stmvl.py
@@ -0,0 +1,109 @@
+import ctypes
+import os
+import platform
+import ctypes as __native_c_types_import;
+import numpy as __numpy_import;
+
+def __marshal_as_numpy_column(__ctype_container, __py_sizen, __py_sizem):
+    __numpy_marshal = __numpy_import.array(__ctype_container).reshape(__py_sizem, __py_sizen).T;
+
+    return __numpy_marshal;
+
+def __marshal_as_native_column(__py_matrix):
+    __py_input_flat = __numpy_import.ndarray.flatten(__py_matrix.T);
+    __ctype_marshal = __numpy_import.ctypeslib.as_ctypes(__py_input_flat);
+
+    return __ctype_marshal;
+
+
+def load_share_lib(name = "lib_algo"):
+    """
+    Determine the OS and load the correct shared library
+    :param name: name of the library
+    :return: the correct path to the library
+    """
+
+    local_path_win = './algorithms/lib/'+name+'.dll'
+    local_path_lin = './algorithms/lib/'+name+'.so'
+
+    if not os.path.exists(local_path_win):
+        local_path_win = './imputegap/algorithms/lib/'+name+'.dll'
+        local_path_lin = './imputegap/algorithms/lib/'+name+'.so'
+
+    if platform.system() == 'Windows':
+        lib_path = os.path.join(local_path_win)
+    else:
+        lib_path = os.path.join(local_path_lin)
+    print("\n", lib_path, " has been loaded...")
+
+    return ctypes.CDLL(lib_path)
+
+
+
+
+def native_stmvl(__py_matrix, __py_window, __py_gamma, __py_alpha):
+    # type: (__numpy_import.array, int, float, int) -> __numpy_import.array
+    """
+    Recovers missing values (designated as NaN) in a matrix. Supports additional parameters
+    :param __py_matrix: 2D array
+    :param __py_window: window size for temporal component
+    :param __py_gamma: smoothing parameter for temporal weight
+    :param __py_alpha: power for spatial weight
+    :return: 2D array recovered matrix
+    """
+
+    shared_lib = load_share_lib()
+
+    __py_sizen = len(__py_matrix);
+    __py_sizem = len(__py_matrix[0]);
+
+    assert (__py_window >= 2);
+    assert (__py_gamma > 0.0);
+    assert (__py_gamma < 1.0);
+    assert (__py_alpha > 0.0);
+
+    __ctype_sizen = __native_c_types_import.c_ulonglong(__py_sizen);
+    __ctype_sizem = __native_c_types_import.c_ulonglong(__py_sizem);
+
+    __ctype_window = __native_c_types_import.c_ulonglong(__py_window);
+    __ctype_gamma = __native_c_types_import.c_double(__py_gamma);
+    __ctype_alpha = __native_c_types_import.c_double(__py_alpha);
+
+    # Native code uses linear matrix layout, and also it's easier to pass it in like this
+    __ctype_input_matrix = __marshal_as_native_column(__py_matrix);
+
+    # extern "C" void
+    # stmvl_imputation_parametrized(
+    #         double *matrixNative, size_t dimN, size_t dimM,
+    #         size_t window_size, double gamma, double alpha
+    # )
+    shared_lib.stmvl_imputation_parametrized(
+        __ctype_input_matrix, __ctype_sizen, __ctype_sizem,
+        __ctype_window, __ctype_gamma, __ctype_alpha
+    );
+
+    __py_recovered = __marshal_as_numpy_column(__ctype_input_matrix, __py_sizen, __py_sizem);
+
+    return __py_recovered;
+
+
+def stmvl(contamination, window_size, gamma, alpha):
+    """
+    CDREC algorithm for imputation of missing data
+    @author : Quentin Nater
+
+    :param contamination: time series with contamination
+    :param window_size: window size for temporal component
+    :param gamma: smoothing parameter for temporal weight
+    :param alpha: power for spatial weight
+
+    :return: imputed_matrix, metrics : all time series with imputation data and their metrics
+
+    """
+
+    # Call the C++ function to perform recovery
+    imputed_matrix = native_stmvl(contamination, window_size, gamma, alpha)
+
+    return imputed_matrix
+
+
diff --git a/imputegap/assets/test_contamination.png b/imputegap/assets/test_contamination.png
diff --git a/imputegap/assets/test_ground_truth.png b/imputegap/assets/test_ground_truth.png
diff --git a/imputegap/assets/test_imputation.png b/imputegap/assets/test_imputation.png
diff --git a/imputegap/imputation/__pycache__/imputation.cpython-312.pyc b/imputegap/imputation/__pycache__/imputation.cpython-312.pyc
diff --git a/imputegap/imputation/imputation.py b/imputegap/imputation/imputation.py
@@ -4,6 +4,7 @@
 from imputegap.algorithms.iim import iim
 from imputegap.algorithms.min_impute import min_impute
 from imputegap.algorithms.mrnn import mrnn
+from imputegap.algorithms.stmvl import stmvl
 from imputegap.algorithms.zero_impute import zero_impute
 from imputegap.evaluation.evaluation import Evaluation
 
@@ -134,7 +135,7 @@ def mrnn_imputation(ground_truth, contamination, params=None):
            :return: imputed_matrix, metrics : all time series with imputation data and their metrics
            """
             if params is not None:
-                hidden_dim, learning_rate, iterations, keep_prob, sequence_length = params
+                hidden_dim, learning_rate, iterations, sequence_length = params
             else:
                 config = Imputation.load_toml()
                 hidden_dim = config['mrnn']['default_hidden_dim']
@@ -150,4 +151,36 @@ def mrnn_imputation(ground_truth, contamination, params=None):
 
             print("\nMRNN Imputation completed without error.\n")
 
+            return imputed_matrix, metrics
+
+    class Pattern:
+        def stmvl_imputation(ground_truth, contamination, params=None):
+            """
+           Imputation of data with MRNN algorithm
+           @author Quentin Nater
+
+           :param ground_truth: original time series without contamination
+           :param contamination: time series with contamination
+           :param params: [Optional] parameters of the algorithm, window_size, gamma, alpha, if None, default ones are loaded
+                :param window_size: window size for temporal component
+                :param gamma: smoothing parameter for temporal weight
+                :param alpha: power for spatial weight
+           :return: imputed_matrix, metrics : all time series with imputation data and their metrics
+           """
+            if params is not None:
+                window_size, gamma, alpha = params
+            else:
+                config = Imputation.load_toml()
+                window_size = config['stmvl']['default_window_size']
+                gamma = config['stmvl']['default_gamma']
+                alpha = config['stmvl']['default_alpha']
+
+            print("\n\nST-MVL Imputation lanched...\n")
+
+            imputed_matrix = stmvl(contamination=contamination, window_size=window_size, gamma=gamma, alpha=alpha)
+
+            metrics = Evaluation(ground_truth, imputed_matrix, contamination).metrics_computation()
+
+            print("\nST-MVL Imputation completed without error.\n")
+
             return imputed_matrix, metrics