Skip to content
This repository has been archived by the owner on Apr 10, 2024. It is now read-only.

Fix for parallel multivariate initialisation #45

Merged
merged 4 commits into from
Mar 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion convst/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

__author__ = 'Antoine Guillaume antoine.guillaume45@gmail.com'
__version__ = "0.2.6"
__version__ = "0.2.7"

__all__ = ['transformers', 'classifiers', 'utils', 'interpreters']

Expand Down
51 changes: 0 additions & 51 deletions convst/transformers/_input_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@

from numba import njit, prange

from pyts.approximation import DiscreteFourierTransform, SymbolicAggregateApproximation

from scipy.signal import periodogram
from scipy.fft import fht, fhtoffset

Expand Down Expand Up @@ -124,55 +122,6 @@ def _get_windows(self):
"cosine","exponential","tukey","taylor"]
)

#TODO : adapt to multivariate/uneven length context
class Sax(BaseEstimator, TransformerMixin):
def __init__(self, n_bins=10, strategy="uniform", random=False):
self.random = random
self.n_bins = n_bins
self.strategy = strategy


def fit(self, X, y=None):
if self.random:
self._random_init(X.shape[1])
self.transformer = SymbolicAggregateApproximation(
n_bins=self.n_bins, strategy=self.strategy, alphabet='ordinal'
)
self.transformer.fit(X[:,0,:])
return self

def transform(self, X):
X = self.transformer.transform(X[:,0,:])
return X[:, np.newaxis, :]

def _random_init(self, n_timestamps):
self.set_params(**{"n_bins":np.random.choice(np.arange(2,min(n_timestamps,26)))})

#TODO : adapt to multivariate/uneven length context
class FourrierCoefs(BaseEstimator, TransformerMixin):
def __init__(
self, n_coefs=None, drop_sum=False, anova=False, norm_mean=False,
norm_std=False
):
self.n_coefs = n_coefs
self.drop_sum = drop_sum
self.anova = anova
self.norm_mean = norm_mean
self.norm_std = norm_std

def fit(self, X, y=None):
self.transformer = DiscreteFourierTransform(
n_coefs=self.n_coefs, drop_sum=self.drop_sum, anova=self.anova,
norm_std=self.norm_std, norm_mean=self.norm_mean,
)
self.transformer.fit(X[:,0,:], y=y)
return self

def transform(self, X):
X = self.transformer.transform(X[:,0,:])
return X[:, np.newaxis, :]


class FastHankelTransform(BaseEstimator, TransformerMixin):
def __init__(
self, dln=0.01, mu=1, offset=0.0, bias=0.0, use_optimal_offset=True
Expand Down
47 changes: 31 additions & 16 deletions convst/transformers/_multivariate_same_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,10 @@ def M_SL_generate_shapelet(
(2,unique_dil.shape[0],n_samples,n_features,n_timestamps), dtype=bool_
)
mask_return = ones(n_shapelets, dtype=bool_)
#Counter for values array indexes
a1 = 0
#Counter for channels_ids array indexes
a2 = 0
#values[idx_val[i]:idx_val[i+1]]=_val
a1 = concatenate((zeros(1, dtype=int64),cumsum(n_channels*lengths)))
#same for channels
a2 = concatenate((zeros(1, dtype=int64),cumsum(n_channels)))

#For each dilation, we can do in parallel
for i_d in prange(unique_dil.shape[0]):
Expand Down Expand Up @@ -251,32 +251,47 @@ def M_SL_generate_shapelet(

_values[a3:b3] = _v
a3 = b3
#Counter for values array indexes
b1 = a1 + _n_channels*_length
#Counter for channels_ids array indexes
b2 = a2 + _n_channels

values[a1:b1] = _values
channel_ids[a2:b2] = _channel_ids
values[a1[i_shp]:a1[i_shp+1]] = _values
channel_ids[a2[i_shp]:a2[i_shp+1]] = _channel_ids

#Extract value between two percentile as threshold for SO
ps = percentile(x_dist, [p_min,p_max])
threshold[i_shp] = uniform(
ps[0], ps[1]
)
a1 = b1
a2 = b2
else:
mask_return[i_shp] = False

lengths = lengths[mask_return]
n_channels = n_channels[mask_return]
mask_channel_ids = zeros(n_channels.sum(), dtype=int64)
mask_values = zeros(
int64(
dot(lengths.astype(float64), n_channels.astype(float64))
)
)

c1 = 0
c2 = 0
for idx, i_shp in enumerate(where(mask_return)[0]):
d1 = c1 + (n_channels[idx] * lengths[idx])
d2 = c2 + n_channels[idx]

mask_values[c1:d1] = values[a1[i_shp]:a1[i_shp+1]]
mask_channel_ids[c2:d2] = channel_ids[a2[i_shp]:a2[i_shp+1]]

c1 = d1
c2 = d2

return (
values[:a1],
lengths[mask_return],
mask_values,
lengths,
dilations[mask_return],
threshold[mask_return],
normalize[mask_return],
n_channels[mask_return],
channel_ids[:a2]
n_channels,
mask_channel_ids
)

@njit(cache=__USE_NUMBA_CACHE__, parallel=__USE_NUMBA_PARALLEL__, fastmath=__USE_NUMBA_FASTMATH__, nogil=__USE_NUMBA_NOGIL__)
Expand Down
47 changes: 32 additions & 15 deletions convst/transformers/_multivariate_variable_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,10 @@ def M_VL_generate_shapelet(
)
mask_return = ones(n_shapelets, dtype=bool_)
#Counter for values array indexes
a1 = 0
#Counter for channels_ids array indexes
a2 = 0
a1 = concatenate((zeros(1, dtype=int64),cumsum(n_channels*lengths)))
#Indexes per shapelets for channel_ids array
a2 = concatenate((zeros(1, dtype=int64),cumsum(n_channels)))

for i in prange(n_samples):
mask_sampling[:,:,i,:,X_len[i]:] = 0

Expand Down Expand Up @@ -280,32 +281,48 @@ def M_VL_generate_shapelet(

_values[a3:b3] = _v
a3 = b3
#Counter for values array indexes
b1 = a1 + _n_channels*_length
#Counter for channels_ids array indexes
b2 = a2 + _n_channels


values[a1:b1] = _values
channel_ids[a2:b2] = _channel_ids
values[a1[i_shp]:a1[i_shp+1]] = _values
channel_ids[a2[i_shp]:a2[i_shp+1]] = _channel_ids

#Extract value between two percentile as threshold for SO
ps = percentile(x_dist, [p_min,p_max])
threshold[i_shp] = uniform(
ps[0], ps[1]
)
a1 = b1
a2 = b2
else:
mask_return[i_shp] = False

lengths = lengths[mask_return]
n_channels = n_channels[mask_return]
mask_channel_ids = zeros(n_channels.sum(), dtype=int64)
mask_values = zeros(
int64(
dot(lengths.astype(float64), n_channels.astype(float64))
)
)

c1 = 0
c2 = 0
for idx, i_shp in enumerate(where(mask_return)[0]):
d1 = c1 + (n_channels[idx] * lengths[idx])
d2 = c2 + n_channels[idx]

mask_values[c1:d1] = values[a1[i_shp]:a1[i_shp+1]]
mask_channel_ids[c2:d2] = channel_ids[a2[i_shp]:a2[i_shp+1]]

c1 = d1
c2 = d2

return (
values[:a1],
lengths[mask_return],
mask_values,
lengths,
dilations[mask_return],
threshold[mask_return],
normalize[mask_return],
n_channels[mask_return],
channel_ids[:a2]
n_channels,
mask_channel_ids
)

@njit(cache=__USE_NUMBA_CACHE__, parallel=__USE_NUMBA_PARALLEL__, fastmath=__USE_NUMBA_FASTMATH__, nogil=__USE_NUMBA_NOGIL__)
Expand Down
15 changes: 7 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "convst"

version = "0.2.6"
version = "0.2.7"

description = "The Random Dilation Shapelet Transform algorithm and associated works"
readme = "README.md"
Expand Down Expand Up @@ -36,15 +36,14 @@ requires-python = ">=3.7,<3.11"
dependencies = [
"sktime>=0.15",
"numba>=0.55",
"numpy>=1.21.0",
"pandas>=1.1.0",
"numpy>=1.21.0,<1.25",
"pandas>=1.1.0,<1.6.0",
"joblib>=1.1.1",
"scikit-learn>=1.0",
"scikit-learn>=0.24.0,<1.3.0",
"statsmodels>=0.12.1",
"scipy>=1.2.0",
"pyts>=0.12",
"matplotlib>=3.3.2",
"seaborn>=0.11.0",
"scipy<2.0.0,>=1.2.0",
"matplotlib>=3.1",
"seaborn>=0.10.0",
"pytest>=7.0",
"sphinx >= 4.2.0",
"sphinx_gallery >= 0.10.1",
Expand Down