Skip to content

Commit

Permalink
mrnn - iim - cdrec - stmvl - linux env
Browse files Browse the repository at this point in the history
  • Loading branch information
qnater committed Sep 24, 2024
1 parent 7b64e2c commit efb2c1c
Show file tree
Hide file tree
Showing 15 changed files with 306 additions and 47 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/pytest_imp_stmvl.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

name: Pytest - ImputeGAP - Imputation - ST-MVL - 8

on:
push:
pull_request:

jobs:
test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
with:
lfs: true

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.8'

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install libmlpack-dev
sudo apt-get install libopenblas-dev
pip install -r requirements.txt
pip install mypy
pip install pytest
- name: Run pytest
run: python -m pytest ./tests/test_imputation_stmvl.py
29 changes: 20 additions & 9 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified imputegap/algorithms/__pycache__/cdrec.cpython-312.pyc
Binary file not shown.
Binary file not shown.
50 changes: 27 additions & 23 deletions imputegap/algorithms/cdrec.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,56 @@
import numpy as np
import ctypes
import os
import platform
import os.path as __os_path_import;
import ctypes as __native_c_types_import;
import numpy as __numpy_import;


def __marshal_as_numpy_column(__ctype_container, __py_sizen, __py_sizem):
__numpy_marshal = __numpy_import.array(__ctype_container).reshape(__py_sizem, __py_sizen).T;

return __numpy_marshal;


def __marshal_as_native_column(__py_matrix):
__py_input_flat = __numpy_import.ndarray.flatten(__py_matrix.T);
__ctype_marshal = __numpy_import.ctypeslib.as_ctypes(__py_input_flat);

return __ctype_marshal;

def native_cdrec_param(__py_matrix, __py_rank, __py_eps, __py_iters):

def load_share_lib(name = "lib_algo"):
"""
Recovers missing values (designated as NaN) in a matrix. Supports additional parameters
:param __py_matrix: 2D array
:param __py_rank: truncation rank to be used (0 = detect truncation automatically)
:param __py_eps: threshold for difference during recovery
:param __py_iters: maximum number of allowed iterations for the algorithms
:return: 2D array recovered matrix
Determine the OS and load the correct shared library
:param name: name of the library
:return: the correct path to the library
"""

# Determine the OS and load the correct shared library

local_path_win = './algorithms/lib/lib_algo.dll'
local_path_lin = './algorithms/lib/lib_algo.so'
local_path_win = './algorithms/lib/'+name+'.dll'
local_path_lin = './algorithms/lib/'+name+'.so'

if not os.path.exists(local_path_win):
local_path_win = './imputegap/algorithms/lib/lib_algo.dll'
local_path_lin = './imputegap/algorithms/lib/lib_algo.so'
local_path_win = './imputegap/algorithms/lib/'+name+'.dll'
local_path_lin = './imputegap/algorithms/lib/'+name+'.so'

if platform.system() == 'Windows':
lib_path = os.path.join(local_path_win)
else:
lib_path = os.path.join(local_path_lin)
print("\n", lib_path, " has been loaded...")

return ctypes.CDLL(lib_path)

print("\n", lib_path , " has been loaded...")

cdrec_lib = ctypes.CDLL(lib_path)

def native_cdrec(__py_matrix, __py_rank, __py_eps, __py_iters):
"""
Recovers missing values (designated as NaN) in a matrix. Supports additional parameters
:param __py_matrix: 2D array
:param __py_rank: truncation rank to be used (0 = detect truncation automatically)
:param __py_eps: threshold for difference during recovery
:param __py_iters: maximum number of allowed iterations for the algorithms
:return: 2D array recovered matrix
"""

shared_lib = load_share_lib()

__py_sizen = len(__py_matrix);
__py_sizem = len(__py_matrix[0]);
Expand All @@ -70,7 +75,7 @@ def native_cdrec_param(__py_matrix, __py_rank, __py_eps, __py_iters):
# double *matrixNative, size_t dimN, size_t dimM,
# size_t truncation, double epsilon, size_t iters
# )
cdrec_lib.cdrec_imputation_parametrized(
shared_lib.cdrec_imputation_parametrized(
__ctype_input_matrix, __ctype_sizen, __ctype_sizem,
__ctype_rank, __ctype_eps, __ctype_iters
);
Expand All @@ -79,12 +84,11 @@ def native_cdrec_param(__py_matrix, __py_rank, __py_eps, __py_iters):

return __py_recovered;

def cdrec(ground_truth, contamination, truncation_rank, iterations, epsilon):
def cdrec(contamination, truncation_rank, iterations, epsilon):
"""
CDREC algorithm for imputation of missing data
@author : Quentin Nater
:param ground_truth: original time series without contamination
:param contamination: time series with contamination
:param truncation_rank: rank of reduction of the matrix (must be higher than 1 and smaller than the limit of series)
:param epsilon : learning rate
Expand All @@ -95,7 +99,7 @@ def cdrec(ground_truth, contamination, truncation_rank, iterations, epsilon):
"""

# Call the C++ function to perform recovery
imputed_matrix = native_cdrec_param(contamination, truncation_rank, epsilon, iterations)
imputed_matrix = native_cdrec(contamination, truncation_rank, epsilon, iterations)

return imputed_matrix

Expand Down
109 changes: 109 additions & 0 deletions imputegap/algorithms/stmvl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import ctypes
import os
import platform
import ctypes as __native_c_types_import;
import numpy as __numpy_import;

def __marshal_as_numpy_column(__ctype_container, __py_sizen, __py_sizem):
__numpy_marshal = __numpy_import.array(__ctype_container).reshape(__py_sizem, __py_sizen).T;

return __numpy_marshal;

def __marshal_as_native_column(__py_matrix):
__py_input_flat = __numpy_import.ndarray.flatten(__py_matrix.T);
__ctype_marshal = __numpy_import.ctypeslib.as_ctypes(__py_input_flat);

return __ctype_marshal;


def load_share_lib(name = "lib_algo"):
"""
Determine the OS and load the correct shared library
:param name: name of the library
:return: the correct path to the library
"""

local_path_win = './algorithms/lib/'+name+'.dll'
local_path_lin = './algorithms/lib/'+name+'.so'

if not os.path.exists(local_path_win):
local_path_win = './imputegap/algorithms/lib/'+name+'.dll'
local_path_lin = './imputegap/algorithms/lib/'+name+'.so'

if platform.system() == 'Windows':
lib_path = os.path.join(local_path_win)
else:
lib_path = os.path.join(local_path_lin)
print("\n", lib_path, " has been loaded...")

return ctypes.CDLL(lib_path)




def native_stmvl(__py_matrix, __py_window, __py_gamma, __py_alpha):
# type: (__numpy_import.array, int, float, int) -> __numpy_import.array
"""
Recovers missing values (designated as NaN) in a matrix. Supports additional parameters
:param __py_matrix: 2D array
:param __py_window: window size for temporal component
:param __py_gamma: smoothing parameter for temporal weight
:param __py_alpha: power for spatial weight
:return: 2D array recovered matrix
"""

shared_lib = load_share_lib()

__py_sizen = len(__py_matrix);
__py_sizem = len(__py_matrix[0]);

assert (__py_window >= 2);
assert (__py_gamma > 0.0);
assert (__py_gamma < 1.0);
assert (__py_alpha > 0.0);

__ctype_sizen = __native_c_types_import.c_ulonglong(__py_sizen);
__ctype_sizem = __native_c_types_import.c_ulonglong(__py_sizem);

__ctype_window = __native_c_types_import.c_ulonglong(__py_window);
__ctype_gamma = __native_c_types_import.c_double(__py_gamma);
__ctype_alpha = __native_c_types_import.c_double(__py_alpha);

# Native code uses linear matrix layout, and also it's easier to pass it in like this
__ctype_input_matrix = __marshal_as_native_column(__py_matrix);

# extern "C" void
# stmvl_imputation_parametrized(
# double *matrixNative, size_t dimN, size_t dimM,
# size_t window_size, double gamma, double alpha
# )
shared_lib.stmvl_imputation_parametrized(
__ctype_input_matrix, __ctype_sizen, __ctype_sizem,
__ctype_window, __ctype_gamma, __ctype_alpha
);

__py_recovered = __marshal_as_numpy_column(__ctype_input_matrix, __py_sizen, __py_sizem);

return __py_recovered;


def stmvl(contamination, window_size, gamma, alpha):
"""
CDREC algorithm for imputation of missing data
@author : Quentin Nater
:param contamination: time series with contamination
:param window_size: window size for temporal component
:param gamma: smoothing parameter for temporal weight
:param alpha: power for spatial weight
:return: imputed_matrix, metrics : all time series with imputation data and their metrics
"""

# Call the C++ function to perform recovery
imputed_matrix = native_stmvl(contamination, window_size, gamma, alpha)

return imputed_matrix


Binary file modified imputegap/assets/test_contamination.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified imputegap/assets/test_ground_truth.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified imputegap/assets/test_imputation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified imputegap/imputation/__pycache__/imputation.cpython-312.pyc
Binary file not shown.
35 changes: 34 additions & 1 deletion imputegap/imputation/imputation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from imputegap.algorithms.iim import iim
from imputegap.algorithms.min_impute import min_impute
from imputegap.algorithms.mrnn import mrnn
from imputegap.algorithms.stmvl import stmvl
from imputegap.algorithms.zero_impute import zero_impute
from imputegap.evaluation.evaluation import Evaluation

Expand Down Expand Up @@ -134,7 +135,7 @@ def mrnn_imputation(ground_truth, contamination, params=None):
:return: imputed_matrix, metrics : all time series with imputation data and their metrics
"""
if params is not None:
hidden_dim, learning_rate, iterations, keep_prob, sequence_length = params
hidden_dim, learning_rate, iterations, sequence_length = params
else:
config = Imputation.load_toml()
hidden_dim = config['mrnn']['default_hidden_dim']
Expand All @@ -150,4 +151,36 @@ def mrnn_imputation(ground_truth, contamination, params=None):

print("\nMRNN Imputation completed without error.\n")

return imputed_matrix, metrics

class Pattern:
def stmvl_imputation(ground_truth, contamination, params=None):
"""
Imputation of data with MRNN algorithm
@author Quentin Nater
:param ground_truth: original time series without contamination
:param contamination: time series with contamination
:param params: [Optional] parameters of the algorithm, window_size, gamma, alpha, if None, default ones are loaded
:param window_size: window size for temporal component
:param gamma: smoothing parameter for temporal weight
:param alpha: power for spatial weight
:return: imputed_matrix, metrics : all time series with imputation data and their metrics
"""
if params is not None:
window_size, gamma, alpha = params
else:
config = Imputation.load_toml()
window_size = config['stmvl']['default_window_size']
gamma = config['stmvl']['default_gamma']
alpha = config['stmvl']['default_alpha']

print("\n\nST-MVL Imputation lanched...\n")

imputed_matrix = stmvl(contamination=contamination, window_size=window_size, gamma=gamma, alpha=alpha)

metrics = Evaluation(ground_truth, imputed_matrix, contamination).metrics_computation()

print("\nST-MVL Imputation completed without error.\n")

return imputed_matrix, metrics
Loading

0 comments on commit efb2c1c

Please sign in to comment.