Skip to content

Commit

Permalink
Merge pull request #734 from mattsmart/master
Browse files Browse the repository at this point in the history
Reproducibility for Aligned UMAP
  • Loading branch information
lmcinnes authored Jul 26, 2021
2 parents 787c282 + 8ae0070 commit c10a82d
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 10 deletions.
26 changes: 16 additions & 10 deletions umap/aligned_umap.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,11 +352,13 @@ def fit(self, X, y=None, **fit_params):
make_epochs_per_sample(mapper.graph_.tocoo().data, n_epochs)
)

rng_state_transform = np.random.RandomState(self.transform_seed)
regularisation_weights = build_neighborhood_similarities(
indptr_list, indices_list, relations,
)
first_init = spectral_layout(
self.mappers_[0]._raw_data, self.mappers_[0].graph_, self.n_components, np.random,
self.mappers_[0]._raw_data, self.mappers_[0].graph_, self.n_components,
rng_state_transform,
)
expansion = 10.0 / np.abs(first_init).max()
first_embedding = (first_init * expansion).astype(np.float32, order="C",)
Expand All @@ -365,7 +367,8 @@ def fit(self, X, y=None, **fit_params):
embeddings.append(first_embedding)
for i in range(1, self.n_models_):
next_init = spectral_layout(
self.mappers_[i]._raw_data, self.mappers_[i].graph_, self.n_components, np.random,
self.mappers_[i]._raw_data, self.mappers_[i].graph_, self.n_components,
rng_state_transform,
)
expansion = 10.0 / np.abs(next_init).max()
next_embedding = (next_init * expansion).astype(np.float32, order="C",)
Expand All @@ -380,9 +383,9 @@ def fit(self, X, y=None, **fit_params):
)
)

random_state = check_random_state(self.random_state)
rng_state = random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)

seed_triplet = rng_state_transform.randint(
INT32_MIN, INT32_MAX, 3
).astype(np.int64)
self.embeddings_ = optimize_layout_aligned_euclidean(
embeddings,
embeddings,
Expand All @@ -392,7 +395,7 @@ def fit(self, X, y=None, **fit_params):
epochs_per_samples,
regularisation_weights,
relations,
rng_state,
seed_triplet,
lambda_=self.alignment_regularisation,
)

Expand Down Expand Up @@ -436,6 +439,8 @@ def update(self, X, y=None, **fit_params):
set_op_mix_ratio=get_nth_item_or_val(self.set_op_mix_ratio, self.n_models_),
unique=get_nth_item_or_val(self.unique, self.n_models_),
n_components=self.n_components,
random_state=self.random_state,
transform_seed=self.transform_seed,
).fit(X)

self.mappers_ += [new_mapper]
Expand Down Expand Up @@ -477,11 +482,12 @@ def update(self, X, y=None, **fit_params):
self.embeddings_[-1], new_mapper.graph_, new_dict_relations
)

random_state = check_random_state(self.random_state)
rng_state = random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)

self.embeddings_.append(new_embedding)

rng_state_transform = np.random.RandomState(self.transform_seed)
seed_triplet = rng_state_transform.randint(
INT32_MIN, INT32_MAX, 3
).astype(np.int64)
self.embeddings_ = optimize_layout_aligned_euclidean(
self.embeddings_,
self.embeddings_,
Expand All @@ -491,6 +497,6 @@ def update(self, X, y=None, **fit_params):
epochs_per_samples,
new_regularisation_weights,
new_relations,
rng_state,
seed_triplet,
lambda_=self.alignment_regularisation,
)
1 change: 1 addition & 0 deletions umap/layouts.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,7 @@ def _optimize_layout_aligned_euclidean_single_epoch(
max_n_edges = e_p_s.shape[0]

embedding_order = np.arange(n_embeddings).astype(np.int32)
np.random.seed(abs(rng_state[0]))
np.random.shuffle(embedding_order)

for i in range(max_n_edges):
Expand Down

0 comments on commit c10a82d

Please sign in to comment.