From 5e5847be3a7b166538aab9db52a60990911aca36 Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Tue, 30 Apr 2024 14:59:28 -0500 Subject: [PATCH 01/13] Add numpy array support for initialize paramater for FPS --- src/skmatter/_selection.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index 95e43ed15..de6aa851a 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -934,7 +934,7 @@ class _FPS(GreedySelector): Parameters ---------- - initialize: int, list of int, or 'random', default=0 + initialize: int, list of int, ndarray of int, or 'random', default=0 Index of the first selection(s). If 'random', picks a random value when fit starts. Stored in :py:attr:`self.initialize`. @@ -1053,6 +1053,12 @@ def _init_greedy_search(self, X, y, n_to_select): for i, val in enumerate(self.initialize): self.selected_idx_[i] = val self._update_post_selection(X, y, self.selected_idx_[i]) + elif isinstance(self.initialize, np.ndarray) and all( + [isinstance(i, numbers.Integral) for i in self.initialize] + ): + for i, val in enumerate(self.initialize): + self.selected_idx_[i] = val + self._update_post_selection(X, y, self.selected_idx_[i]) else: raise ValueError("Invalid value of the initialize parameter") From 1e1658a495bbaadf098e5a16f87d45c9002062b7 Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:07:33 -0500 Subject: [PATCH 02/13] Adding unit test for initialize as np array --- src/skmatter/feature_selection/_base.py | 2 +- tests/test_feature_simple_fps.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/skmatter/feature_selection/_base.py b/src/skmatter/feature_selection/_base.py index 4971f853d..74fb8257b 100644 --- a/src/skmatter/feature_selection/_base.py +++ b/src/skmatter/feature_selection/_base.py @@ -12,7 +12,7 @@ class FPS(_FPS): Parameters ---------- - initialize: int, list of int, or 'random', default=0 + initialize: int, list of int, ndarray of int, or 'random', default=0 Index of the first selection(s). If 'random', picks a random value when fit starts. Stored in :py:attr:`self.initialize`. diff --git a/tests/test_feature_simple_fps.py b/tests/test_feature_simple_fps.py index b29a2bc7b..9052b5e48 100644 --- a/tests/test_feature_simple_fps.py +++ b/tests/test_feature_simple_fps.py @@ -1,5 +1,7 @@ import unittest +import numpy as np + from sklearn.datasets import load_diabetes as get_dataset from sklearn.utils.validation import NotFittedError @@ -42,6 +44,13 @@ def test_initialize(self): for i in range(4): self.assertEqual(selector.selected_idx_[i], self.idx[i]) + initialize = np.array(self.idx[:4]) + with self.subTest(initialize=initialize): + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + for i in range(4): + self.assertEqual(selector.selected_idx_[i], self.idx[i]) + with self.assertRaises(ValueError) as cm: selector = FPS(n_to_select=1, initialize="bad") selector.fit(self.X) From 34229f9745cf8d9805d2d9e683287d07f71b5e0f Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:11:38 -0500 Subject: [PATCH 03/13] Fixed linting issue --- tests/test_feature_simple_fps.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_feature_simple_fps.py b/tests/test_feature_simple_fps.py index 9052b5e48..fc57da377 100644 --- a/tests/test_feature_simple_fps.py +++ b/tests/test_feature_simple_fps.py @@ -1,7 +1,6 @@ import unittest import numpy as np - from sklearn.datasets import load_diabetes as get_dataset from sklearn.utils.validation import NotFittedError From 9b0b77d989d0d894125130d7f556331d259c81c8 Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:28:29 -0500 Subject: [PATCH 04/13] Added fix for np array value error --- src/skmatter/_selection.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index de6aa851a..b9602c9fa 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -1038,7 +1038,14 @@ def _init_greedy_search(self, X, y, n_to_select): self.hausdorff_ = np.full(X.shape[self._axis], np.inf) self.hausdorff_at_select_ = np.full(X.shape[self._axis], np.inf) - if self.initialize == "random": + if isinstance(self.initialize, np.ndarray): + if all(isinstance(i, numbers.Integral) for i in self.initialize): + for i, val in enumerate(self.initialize): + self.selected_idx_[i] = val + self._update_post_selection(X, y, self.selected_idx_[i]) + else: + raise ValueError("Initialize parameter must contain only int") + elif self.initialize == "random": random_state = check_random_state(self.random_state) initialize = random_state.randint(X.shape[self._axis]) self.selected_idx_[0] = initialize @@ -1053,12 +1060,7 @@ def _init_greedy_search(self, X, y, n_to_select): for i, val in enumerate(self.initialize): self.selected_idx_[i] = val self._update_post_selection(X, y, self.selected_idx_[i]) - elif isinstance(self.initialize, np.ndarray) and all( - [isinstance(i, numbers.Integral) for i in self.initialize] - ): - for i, val in enumerate(self.initialize): - self.selected_idx_[i] = val - self._update_post_selection(X, y, self.selected_idx_[i]) + else: raise ValueError("Invalid value of the initialize parameter") From ce536cdff34cc54c5eca339a8e1ae6b8aa83a6b0 Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:51:20 -0500 Subject: [PATCH 05/13] Adding unit test for case with np array containing non-ints --- tests/test_feature_simple_fps.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_feature_simple_fps.py b/tests/test_feature_simple_fps.py index fc57da377..b8961da27 100644 --- a/tests/test_feature_simple_fps.py +++ b/tests/test_feature_simple_fps.py @@ -50,6 +50,15 @@ def test_initialize(self): for i in range(4): self.assertEqual(selector.selected_idx_[i], self.idx[i]) + initialize = np.array([1, 5, 3, 0.25]) + with self.subTest(initialize=initialize): + with self.assertRaises(ValueError) as cm: + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + self.assertEqual( + str(cm.exception), "Initialize parameter must contain only int" + ) + with self.assertRaises(ValueError) as cm: selector = FPS(n_to_select=1, initialize="bad") selector.fit(self.X) From c1a1b9ed7a90e45507e2bb0d9193bde301db01be Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:56:48 -0500 Subject: [PATCH 06/13] Adding documentation in skmatter.sample_selection --- src/skmatter/sample_selection/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/skmatter/sample_selection/_base.py b/src/skmatter/sample_selection/_base.py index 6026bce7b..22c3ac496 100644 --- a/src/skmatter/sample_selection/_base.py +++ b/src/skmatter/sample_selection/_base.py @@ -58,7 +58,7 @@ class FPS(_FPS): Parameters ---------- - initialize: int, list of int, or 'random', default=0 + initialize: int, list of int, ndarray of int, or 'random', default=0 Index of the first selection(s). If 'random', picks a random value when fit starts. Stored in :py:attr:`self.initialize`. From c25c850b4a5f124922788c22bc8fddf427d42877 Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Thu, 2 May 2024 14:47:23 -0500 Subject: [PATCH 07/13] Removed unnecessary test and fixed initialize --- src/skmatter/_selection.py | 17 ++++++++--------- tests/test_feature_simple_fps.py | 9 --------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index b9602c9fa..9221af3bb 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -934,7 +934,7 @@ class _FPS(GreedySelector): Parameters ---------- - initialize: int, list of int, ndarray of int, or 'random', default=0 + initialize: int, list of int, numpy.ndarray of int, or 'random', default=0 Index of the first selection(s). If 'random', picks a random value when fit starts. Stored in :py:attr:`self.initialize`. @@ -1038,14 +1038,7 @@ def _init_greedy_search(self, X, y, n_to_select): self.hausdorff_ = np.full(X.shape[self._axis], np.inf) self.hausdorff_at_select_ = np.full(X.shape[self._axis], np.inf) - if isinstance(self.initialize, np.ndarray): - if all(isinstance(i, numbers.Integral) for i in self.initialize): - for i, val in enumerate(self.initialize): - self.selected_idx_[i] = val - self._update_post_selection(X, y, self.selected_idx_[i]) - else: - raise ValueError("Initialize parameter must contain only int") - elif self.initialize == "random": + if self.initialize == "random": random_state = check_random_state(self.random_state) initialize = random_state.randint(X.shape[self._axis]) self.selected_idx_[0] = initialize @@ -1060,6 +1053,12 @@ def _init_greedy_search(self, X, y, n_to_select): for i, val in enumerate(self.initialize): self.selected_idx_[i] = val self._update_post_selection(X, y, self.selected_idx_[i]) + elif isinstance(self.initialize, np.ndarray) and all( + isinstance(i, numbers.Integral) for i in self.initialize + ): + for i, val in enumerate(self.initialize): + self.selected_idx_[i] = val + self._update_post_selection(X, y, self.selected_idx_[i]) else: raise ValueError("Invalid value of the initialize parameter") diff --git a/tests/test_feature_simple_fps.py b/tests/test_feature_simple_fps.py index b8961da27..fc57da377 100644 --- a/tests/test_feature_simple_fps.py +++ b/tests/test_feature_simple_fps.py @@ -50,15 +50,6 @@ def test_initialize(self): for i in range(4): self.assertEqual(selector.selected_idx_[i], self.idx[i]) - initialize = np.array([1, 5, 3, 0.25]) - with self.subTest(initialize=initialize): - with self.assertRaises(ValueError) as cm: - selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) - selector.fit(self.X) - self.assertEqual( - str(cm.exception), "Initialize parameter must contain only int" - ) - with self.assertRaises(ValueError) as cm: selector = FPS(n_to_select=1, initialize="bad") selector.fit(self.X) From c4ee83020be9aa47f5617d7c9739789f67cb10fd Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Thu, 2 May 2024 15:04:54 -0500 Subject: [PATCH 08/13] Revert "Removed unnecessary test and fixed initialize" This reverts commit c25c850b4a5f124922788c22bc8fddf427d42877. --- src/skmatter/_selection.py | 17 +++++++++-------- tests/test_feature_simple_fps.py | 9 +++++++++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index 9221af3bb..b9602c9fa 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -934,7 +934,7 @@ class _FPS(GreedySelector): Parameters ---------- - initialize: int, list of int, numpy.ndarray of int, or 'random', default=0 + initialize: int, list of int, ndarray of int, or 'random', default=0 Index of the first selection(s). If 'random', picks a random value when fit starts. Stored in :py:attr:`self.initialize`. @@ -1038,7 +1038,14 @@ def _init_greedy_search(self, X, y, n_to_select): self.hausdorff_ = np.full(X.shape[self._axis], np.inf) self.hausdorff_at_select_ = np.full(X.shape[self._axis], np.inf) - if self.initialize == "random": + if isinstance(self.initialize, np.ndarray): + if all(isinstance(i, numbers.Integral) for i in self.initialize): + for i, val in enumerate(self.initialize): + self.selected_idx_[i] = val + self._update_post_selection(X, y, self.selected_idx_[i]) + else: + raise ValueError("Initialize parameter must contain only int") + elif self.initialize == "random": random_state = check_random_state(self.random_state) initialize = random_state.randint(X.shape[self._axis]) self.selected_idx_[0] = initialize @@ -1053,12 +1060,6 @@ def _init_greedy_search(self, X, y, n_to_select): for i, val in enumerate(self.initialize): self.selected_idx_[i] = val self._update_post_selection(X, y, self.selected_idx_[i]) - elif isinstance(self.initialize, np.ndarray) and all( - isinstance(i, numbers.Integral) for i in self.initialize - ): - for i, val in enumerate(self.initialize): - self.selected_idx_[i] = val - self._update_post_selection(X, y, self.selected_idx_[i]) else: raise ValueError("Invalid value of the initialize parameter") diff --git a/tests/test_feature_simple_fps.py b/tests/test_feature_simple_fps.py index fc57da377..b8961da27 100644 --- a/tests/test_feature_simple_fps.py +++ b/tests/test_feature_simple_fps.py @@ -50,6 +50,15 @@ def test_initialize(self): for i in range(4): self.assertEqual(selector.selected_idx_[i], self.idx[i]) + initialize = np.array([1, 5, 3, 0.25]) + with self.subTest(initialize=initialize): + with self.assertRaises(ValueError) as cm: + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + self.assertEqual( + str(cm.exception), "Initialize parameter must contain only int" + ) + with self.assertRaises(ValueError) as cm: selector = FPS(n_to_select=1, initialize="bad") selector.fit(self.X) From fe78f8feb1971134aefdb6cbcc7246ceef95b796 Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Thu, 2 May 2024 15:09:19 -0500 Subject: [PATCH 09/13] Adding "numpy" before ndarray in docstrings --- src/skmatter/_selection.py | 2 +- src/skmatter/feature_selection/_base.py | 2 +- src/skmatter/sample_selection/_base.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index b9602c9fa..e9632fb16 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -934,7 +934,7 @@ class _FPS(GreedySelector): Parameters ---------- - initialize: int, list of int, ndarray of int, or 'random', default=0 + initialize: int, list of int, numpy.ndarray of int, or 'random', default=0 Index of the first selection(s). If 'random', picks a random value when fit starts. Stored in :py:attr:`self.initialize`. diff --git a/src/skmatter/feature_selection/_base.py b/src/skmatter/feature_selection/_base.py index 74fb8257b..e6702e126 100644 --- a/src/skmatter/feature_selection/_base.py +++ b/src/skmatter/feature_selection/_base.py @@ -12,7 +12,7 @@ class FPS(_FPS): Parameters ---------- - initialize: int, list of int, ndarray of int, or 'random', default=0 + initialize: int, list of int, numpy.ndarray of int, or 'random', default=0 Index of the first selection(s). If 'random', picks a random value when fit starts. Stored in :py:attr:`self.initialize`. diff --git a/src/skmatter/sample_selection/_base.py b/src/skmatter/sample_selection/_base.py index 22c3ac496..ab2c539d6 100644 --- a/src/skmatter/sample_selection/_base.py +++ b/src/skmatter/sample_selection/_base.py @@ -58,7 +58,7 @@ class FPS(_FPS): Parameters ---------- - initialize: int, list of int, ndarray of int, or 'random', default=0 + initialize: int, list of int, numpy.ndarray of int, or 'random', default=0 Index of the first selection(s). If 'random', picks a random value when fit starts. Stored in :py:attr:`self.initialize`. From 624b9bb09591e68236c39179f2c1515b0455d21a Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Fri, 3 May 2024 12:08:30 -0500 Subject: [PATCH 10/13] Changing error message and adding another unit test --- src/skmatter/_selection.py | 2 +- tests/test_feature_simple_fps.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index e9632fb16..a6ca46b4c 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -1044,7 +1044,7 @@ def _init_greedy_search(self, X, y, n_to_select): self.selected_idx_[i] = val self._update_post_selection(X, y, self.selected_idx_[i]) else: - raise ValueError("Initialize parameter must contain only int") + raise ValueError("Invalid value of the initialize parameter") elif self.initialize == "random": random_state = check_random_state(self.random_state) initialize = random_state.randint(X.shape[self._axis]) diff --git a/tests/test_feature_simple_fps.py b/tests/test_feature_simple_fps.py index b8961da27..f2b42d021 100644 --- a/tests/test_feature_simple_fps.py +++ b/tests/test_feature_simple_fps.py @@ -56,7 +56,16 @@ def test_initialize(self): selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) selector.fit(self.X) self.assertEqual( - str(cm.exception), "Initialize parameter must contain only int" + str(cm.exception), "Invalid value of the initialize parameter" + ) + + initialize = np.array([[1, 5, 3], [2, 4, 6]]) + with self.subTest(initialize=initialize): + with self.assertRaises(ValueError) as cm: + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + self.assertEqual( + str(cm.exception), "Invalid value of the initialize parameter" ) with self.assertRaises(ValueError) as cm: From a38393618d4a54d5c9dd15a97d677e3d76187511 Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Fri, 3 May 2024 12:55:06 -0500 Subject: [PATCH 11/13] Added unit tests --- tests/test_sample_simple_fps.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test_sample_simple_fps.py b/tests/test_sample_simple_fps.py index ca7ee4bee..e250eaaed 100644 --- a/tests/test_sample_simple_fps.py +++ b/tests/test_sample_simple_fps.py @@ -1,5 +1,6 @@ import unittest +import numpy as np from sklearn.datasets import load_diabetes as get_dataset from sklearn.utils.validation import NotFittedError @@ -43,6 +44,31 @@ def test_initialize(self): for i in range(4): self.assertEqual(selector.selected_idx_[i], self.idx[i]) + initialize = np.array(self.idx[:4]) + with self.subTest(initialize=initialize): + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + for i in range(4): + self.assertEqual(selector.selected_idx_[i], self.idx[i]) + + initialize = np.array([1, 5, 3, 0.25]) + with self.subTest(initialize=initialize): + with self.assertRaises(ValueError) as cm: + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + self.assertEqual( + str(cm.exception), "Invalid value of the initialize parameter" + ) + + initialize = np.array([[1, 5, 3], [2, 4, 6]]) + with self.subTest(initialize=initialize): + with self.assertRaises(ValueError) as cm: + selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize) + selector.fit(self.X) + self.assertEqual( + str(cm.exception), "Invalid value of the initialize parameter" + ) + with self.assertRaises(ValueError) as cm: selector = FPS(n_to_select=1, initialize="bad") selector.fit(self.X) From c5480cc6733e75c5af78523ebe3488a854b317fe Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Tue, 7 May 2024 10:35:48 -0500 Subject: [PATCH 12/13] Combined if statements for list and array --- src/skmatter/_selection.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index a6ca46b4c..e5cb76601 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -1038,7 +1038,7 @@ def _init_greedy_search(self, X, y, n_to_select): self.hausdorff_ = np.full(X.shape[self._axis], np.inf) self.hausdorff_at_select_ = np.full(X.shape[self._axis], np.inf) - if isinstance(self.initialize, np.ndarray): + if isinstance(self.initialize, (np.ndarray, list)): if all(isinstance(i, numbers.Integral) for i in self.initialize): for i, val in enumerate(self.initialize): self.selected_idx_[i] = val @@ -1054,13 +1054,6 @@ def _init_greedy_search(self, X, y, n_to_select): initialize = self.initialize self.selected_idx_[0] = initialize self._update_post_selection(X, y, self.selected_idx_[0]) - elif isinstance(self.initialize, list) and all( - [isinstance(i, numbers.Integral) for i in self.initialize] - ): - for i, val in enumerate(self.initialize): - self.selected_idx_[i] = val - self._update_post_selection(X, y, self.selected_idx_[i]) - else: raise ValueError("Invalid value of the initialize parameter") From 9cf5bb05c67f7d312c68104e32891d886aaadc40 Mon Sep 17 00:00:00 2001 From: Christian Jorgensen <114787994+cajchristian@users.noreply.github.com> Date: Mon, 13 May 2024 10:42:58 -0500 Subject: [PATCH 13/13] Update CHANGELOG --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index 93fc80c0b..8117c69a7 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,6 +13,7 @@ The rules for CHANGELOG file: 0.3.0 (XXXX/XX/XX) ------------------ +- Updating ``FPS`` to allow a numpy array of ints as an initialize parameter (#145) - Supported Python versions are now ranging from 3.9 - 3.12. 0.2.0 (2023/08/24)