From 64378e0697fe9f77446484e4198267a22cc8f0e6 Mon Sep 17 00:00:00 2001 From: mattsmart Date: Thu, 22 Jul 2021 12:12:24 -0400 Subject: [PATCH 1/4] seed the random.shuffle call using the first int in rng_state --- umap/layouts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/umap/layouts.py b/umap/layouts.py index c3d79fc7..b6ac447b 100644 --- a/umap/layouts.py +++ b/umap/layouts.py @@ -722,6 +722,7 @@ def _optimize_layout_aligned_euclidean_single_epoch( max_n_edges = e_p_s.shape[0] embedding_order = np.arange(n_embeddings).astype(np.int32) + np.random.seed(rng_state[0]) np.random.shuffle(embedding_order) for i in range(max_n_edges): From f4fcb2292354c9f530fd385178bd31521fd85e8f Mon Sep 17 00:00:00 2001 From: mattsmart Date: Thu, 22 Jul 2021 12:17:01 -0400 Subject: [PATCH 2/4] propogate reproducible random state set by transform_seed through spectral_layout and alignment --- umap/aligned_umap.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/umap/aligned_umap.py b/umap/aligned_umap.py index 2f8338e1..c0e64f27 100644 --- a/umap/aligned_umap.py +++ b/umap/aligned_umap.py @@ -352,11 +352,12 @@ def fit(self, X, y=None, **fit_params): make_epochs_per_sample(mapper.graph_.tocoo().data, n_epochs) ) + rng_state_transform = np.random.RandomState(self.transform_seed) regularisation_weights = build_neighborhood_similarities( indptr_list, indices_list, relations, ) first_init = spectral_layout( - self.mappers_[0]._raw_data, self.mappers_[0].graph_, self.n_components, np.random, + self.mappers_[0]._raw_data, self.mappers_[0].graph_, self.n_components, rng_state_transform, ) expansion = 10.0 / np.abs(first_init).max() first_embedding = (first_init * expansion).astype(np.float32, order="C",) @@ -365,7 +366,7 @@ def fit(self, X, y=None, **fit_params): embeddings.append(first_embedding) for i in range(1, self.n_models_): next_init = spectral_layout( - self.mappers_[i]._raw_data, self.mappers_[i].graph_, self.n_components, np.random, + self.mappers_[i]._raw_data, self.mappers_[i].graph_, self.n_components, rng_state_transform, ) expansion = 10.0 / np.abs(next_init).max() next_embedding = (next_init * expansion).astype(np.float32, order="C",) @@ -380,9 +381,7 @@ def fit(self, X, y=None, **fit_params): ) ) - random_state = check_random_state(self.random_state) - rng_state = random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) - + seed_triplet = rng_state_transform.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) self.embeddings_ = optimize_layout_aligned_euclidean( embeddings, embeddings, @@ -392,7 +391,7 @@ def fit(self, X, y=None, **fit_params): epochs_per_samples, regularisation_weights, relations, - rng_state, + seed_triplet, lambda_=self.alignment_regularisation, ) @@ -436,6 +435,8 @@ def update(self, X, y=None, **fit_params): set_op_mix_ratio=get_nth_item_or_val(self.set_op_mix_ratio, self.n_models_), unique=get_nth_item_or_val(self.unique, self.n_models_), n_components=self.n_components, + random_state=self.random_state, + transform_seed=self.transform_seed, ).fit(X) self.mappers_ += [new_mapper] @@ -477,11 +478,10 @@ def update(self, X, y=None, **fit_params): self.embeddings_[-1], new_mapper.graph_, new_dict_relations ) - random_state = check_random_state(self.random_state) - rng_state = random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) - self.embeddings_.append(new_embedding) + rng_state_transform = np.random.RandomState(self.transform_seed) + seed_triplet = rng_state_transform.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) self.embeddings_ = optimize_layout_aligned_euclidean( self.embeddings_, self.embeddings_, @@ -491,6 +491,6 @@ def update(self, X, y=None, **fit_params): epochs_per_samples, new_regularisation_weights, new_relations, - rng_state, + seed_triplet, lambda_=self.alignment_regularisation, ) From 7ca20f3a63e3f2fef16622d958eb1c4d20e0b7eb Mon Sep 17 00:00:00 2001 From: mattsmart Date: Thu, 22 Jul 2021 13:14:12 -0400 Subject: [PATCH 3/4] pep8 line length fixes --- umap/aligned_umap.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/umap/aligned_umap.py b/umap/aligned_umap.py index c0e64f27..e290ccef 100644 --- a/umap/aligned_umap.py +++ b/umap/aligned_umap.py @@ -357,7 +357,8 @@ def fit(self, X, y=None, **fit_params): indptr_list, indices_list, relations, ) first_init = spectral_layout( - self.mappers_[0]._raw_data, self.mappers_[0].graph_, self.n_components, rng_state_transform, + self.mappers_[0]._raw_data, self.mappers_[0].graph_, self.n_components, + rng_state_transform, ) expansion = 10.0 / np.abs(first_init).max() first_embedding = (first_init * expansion).astype(np.float32, order="C",) @@ -366,7 +367,8 @@ def fit(self, X, y=None, **fit_params): embeddings.append(first_embedding) for i in range(1, self.n_models_): next_init = spectral_layout( - self.mappers_[i]._raw_data, self.mappers_[i].graph_, self.n_components, rng_state_transform, + self.mappers_[i]._raw_data, self.mappers_[i].graph_, self.n_components, + rng_state_transform, ) expansion = 10.0 / np.abs(next_init).max() next_embedding = (next_init * expansion).astype(np.float32, order="C",) @@ -381,7 +383,9 @@ def fit(self, X, y=None, **fit_params): ) ) - seed_triplet = rng_state_transform.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) + seed_triplet = rng_state_transform.randint( + INT32_MIN, INT32_MAX, 3 + ).astype(np.int64) self.embeddings_ = optimize_layout_aligned_euclidean( embeddings, embeddings, @@ -481,7 +485,9 @@ def update(self, X, y=None, **fit_params): self.embeddings_.append(new_embedding) rng_state_transform = np.random.RandomState(self.transform_seed) - seed_triplet = rng_state_transform.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) + seed_triplet = rng_state_transform.randint( + INT32_MIN, INT32_MAX, 3 + ).astype(np.int64) self.embeddings_ = optimize_layout_aligned_euclidean( self.embeddings_, self.embeddings_, From 8ae0070f76e7737b811909142743081229440130 Mon Sep 17 00:00:00 2001 From: mattsmart Date: Thu, 22 Jul 2021 14:09:57 -0400 Subject: [PATCH 4/4] enforce seed is between 0 and INT32_MAX --- umap/layouts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umap/layouts.py b/umap/layouts.py index b6ac447b..d7544b82 100644 --- a/umap/layouts.py +++ b/umap/layouts.py @@ -722,7 +722,7 @@ def _optimize_layout_aligned_euclidean_single_epoch( max_n_edges = e_p_s.shape[0] embedding_order = np.arange(n_embeddings).astype(np.int32) - np.random.seed(rng_state[0]) + np.random.seed(abs(rng_state[0])) np.random.shuffle(embedding_order) for i in range(max_n_edges):