From dd31493e9063aecf339ff60dc534e9aa4d3cf667 Mon Sep 17 00:00:00 2001 From: Christian Jorgensen <114787994+cajchristian@users.noreply.github.com> Date: Thu, 16 May 2024 13:15:52 -0500 Subject: [PATCH] Allowing FPS to take numpy array of ints as initialize parameter (#225) * Add numpy array support for initialize paramater for FPS * Adding unit test for initialize as np array * Fixed linting issue * Added fix for np array value error * Adding unit test for case with np array containing non-ints * Adding documentation in skmatter.sample_selection * Removed unnecessary test and fixed initialize * Revert "Removed unnecessary test and fixed initialize" This reverts commit c25c850b4a5f124922788c22bc8fddf427d42877. * Adding "numpy" before ndarray in docstrings * Changing error message and adding another unit test * Added unit tests * Combined if statements for list and array * Update CHANGELOG --------- Co-authored-by: Christian Jorgensen --- CHANGELOG | 1 + src/skmatter/_selection.py | 17 ++++++++-------- src/skmatter/feature_selection/_base.py | 2 +- src/skmatter/sample_selection/_base.py | 2 +- tests/test_feature_simple_fps.py | 26 +++++++++++++++++++++++++ tests/test_sample_simple_fps.py | 26 +++++++++++++++++++++++++ 6 files changed, 64 insertions(+), 10 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 93fc80c0b..8117c69a7 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,6 +13,7 @@ The rules for CHANGELOG file: 0.3.0 (XXXX/XX/XX) ------------------ +- Updating ``FPS`` to allow a numpy array of ints as an initialize parameter (#145) - Supported Python versions are now ranging from 3.9 - 3.12. 0.2.0 (2023/08/24) diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index 95e43ed15..e5cb76601 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -934,7 +934,7 @@ class _FPS(GreedySelector): Parameters ---------- - initialize: int, list of int, or 'random', default=0 + initialize: int, list of int, numpy.ndarray of int, or 'random', default=0 Index of the first selection(s). If 'random', picks a random value when fit starts. Stored in :py:attr:`self.initialize`. @@ -1038,7 +1038,14 @@ def _init_greedy_search(self, X, y, n_to_select): self.hausdorff_ = np.full(X.shape[self._axis], np.inf) self.hausdorff_at_select_ = np.full(X.shape[self._axis], np.inf) - if self.initialize == "random": + if isinstance(self.initialize, (np.ndarray, list)): + if all(isinstance(i, numbers.Integral) for i in self.initialize): + for i, val in enumerate(self.initialize): + self.selected_idx_[i] = val + self._update_post_selection(X, y, self.selected_idx_[i]) + else: + raise ValueError("Invalid value of the initialize parameter") + elif self.initialize == "random": random_state = check_random_state(self.random_state) initialize = random_state.randint(X.shape[self._axis]) self.selected_idx_[0] = initialize @@ -1047,12 +1054,6 @@ def _init_greedy_search(self, X, y, n_to_select): initialize = self.initialize self.selected_idx_[0] = initialize self._update_post_selection(X, y, self.selected_idx_[0]) - elif isinstance(self.initialize, list) and all( - [isinstance(i, numbers.Integral) for i in self.initialize] - ): - for i, val in enumerate(self.initialize): - self.selected_idx_[i] = val - self._update_post_selection(X, y, self.selected_idx_[i]) else: raise ValueError("Invalid value of the initialize parameter") diff --git a/src/skmatter/feature_selection/_base.py b/src/skmatter/feature_selection/_base.py index 4971f853d..e6702e126 100644 --- a/src/skmatter/feature_selection/_base.py +++ b/src/skmatter/feature_selection/_base.py @@ -12,7 +12,7 @@ class FPS(_FPS): Parameters ---------- - initialize: int, list of int, or 'random', default=0 + initialize: int, list of int, numpy.ndarray of int, or 'random', default=0 Index of the first selection(s). If 'random', picks a random value when fit starts. Stored in :py:attr:`self.initialize`. diff --git a/src/skmatter/sample_selection/_base.py b/src/skmatter/sample_selection/_base.py index 6026bce7b..ab2c539d6 100644 --- a/src/skmatter/sample_selection/_base.py +++ b/src/skmatter/sample_selection/_base.py @@ -58,7 +58,7 @@ class FPS(_FPS): Parameters ---------- - initialize: int, list of int, or 'random', default=0 + initialize: int, list of int, numpy.ndarray of int, or 'random', default=0 Index of the first selection(s). If 'random', picks a random value when fit starts. Stored in :py:attr:`self.initialize`. diff --git a/tests/test_feature_simple_fps.py b/tests/test_feature_simple_fps.py index b29a2bc7b..f2b42d021 100644 --- a/tests/test_feature_simple_fps.py +++ b/tests/test_feature_simple_fps.py @@ -1,5 +1,6 @@ import unittest +import numpy as np from sklearn.datasets import load_diabetes as get_dataset from sklearn.utils.validation import NotFittedError @@ -42,6 +43,31 @@ def test_initialize(self): for i in range(4): self.assertEqual(selector.selected_idx_[i], self.idx[i]) + initialize = np.array(self.idx[:4]) + with self.subTest(initialize=initialize): + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + for i in range(4): + self.assertEqual(selector.selected_idx_[i], self.idx[i]) + + initialize = np.array([1, 5, 3, 0.25]) + with self.subTest(initialize=initialize): + with self.assertRaises(ValueError) as cm: + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + self.assertEqual( + str(cm.exception), "Invalid value of the initialize parameter" + ) + + initialize = np.array([[1, 5, 3], [2, 4, 6]]) + with self.subTest(initialize=initialize): + with self.assertRaises(ValueError) as cm: + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + self.assertEqual( + str(cm.exception), "Invalid value of the initialize parameter" + ) + with self.assertRaises(ValueError) as cm: selector = FPS(n_to_select=1, initialize="bad") selector.fit(self.X) diff --git a/tests/test_sample_simple_fps.py b/tests/test_sample_simple_fps.py index 5ca9fb4b5..727d9ec11 100644 --- a/tests/test_sample_simple_fps.py +++ b/tests/test_sample_simple_fps.py @@ -1,5 +1,6 @@ import unittest +import numpy as np from sklearn.datasets import load_diabetes as get_dataset from sklearn.utils.validation import NotFittedError @@ -43,6 +44,31 @@ def test_initialize(self): for i in range(4): self.assertEqual(selector.selected_idx_[i], self.idx[i]) + initialize = np.array(self.idx[:4]) + with self.subTest(initialize=initialize): + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + for i in range(4): + self.assertEqual(selector.selected_idx_[i], self.idx[i]) + + initialize = np.array([1, 5, 3, 0.25]) + with self.subTest(initialize=initialize): + with self.assertRaises(ValueError) as cm: + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + self.assertEqual( + str(cm.exception), "Invalid value of the initialize parameter" + ) + + initialize = np.array([[1, 5, 3], [2, 4, 6]]) + with self.subTest(initialize=initialize): + with self.assertRaises(ValueError) as cm: + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + self.assertEqual( + str(cm.exception), "Invalid value of the initialize parameter" + ) + with self.assertRaises(ValueError) as cm: selector = FPS(n_to_select=1, initialize="bad") selector.fit(self.X)