Skip to content

Commit

Permalink
fix: handle small cluster dataset reduction plot by switching to PCA …
Browse files Browse the repository at this point in the history
…and reduce build to only linux
  • Loading branch information
danellecline committed Jul 31, 2024
1 parent 09542e1 commit 16ab4de
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ jobs:
docker buildx create --name mybuilder --platform linux/amd64,linux/arm64 --use
# Running out of space for this build so commenting out for now
#docker buildx build --push --platform linux/amd64 -t mbari/sdcat:$RELEASE_VERSION-cuda124 --label GIT_VERSION=$RELEASE_VERSION --label IMAGE_URI=mbari/sdcat:$RELEASE_VERSION-cuda124 -f docker/Dockerfile.cuda .
docker buildx build --push --platform linux/amd64,linux/arm64 -t mbari/sdcat:$RELEASE_VERSION --label GIT_VERSION=$RELEASE_VERSION --label IMAGE_URI=mbari/sdcat:$RELEASE_VERSION -f docker/Dockerfile .
docker buildx build --push --platform linux/amd64 -t mbari/sdcat:$RELEASE_VERSION --label GIT_VERSION=$RELEASE_VERSION --label IMAGE_URI=mbari/sdcat:$RELEASE_VERSION -f docker/Dockerfile .
push_readme_to_dockerhub:
runs-on: ubuntu-latest
name: Push README to Docker Hub
Expand Down
29 changes: 19 additions & 10 deletions sdcat/cluster/cluster.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -214,17 +214,26 @@ def _run_hdbscan_assign(
init = 'spectral'

# Reduce the dimensionality of the embeddings using UMAP to 2 dimensions to visualize the clusters
if have_gpu:
xx = cuUMAP(init=init,
n_components=2,
n_neighbors=3,
min_dist=0.1,
metric='euclidean').fit_transform(df.values)
n_neighbors = min(15, df.values.shape[0] - 1)
info(f'Using {n_neighbors} neighbors for dimensional reduction')
if n_neighbors < 2:
warn('Using PCA instead of UMAP')
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
xx = pca.fit_transform(df.values)
else:
xx = UMAP(init=init,
n_components=2,
metric='cosine',
low_memory=True).fit_transform(df.values)
if have_gpu:
xx = cuUMAP(init=init,
n_components=2,
n_neighbors=n_neighbors,
min_dist=0.1,
metric='euclidean').fit_transform(df.values)
else:
xx = UMAP(init=init,
n_components=2,
n_neighbors=n_neighbors,
metric='cosine',
low_memory=True).fit_transform(df.values)

df = pd.DataFrame({'x': xx[clustered, 0], 'y': xx[clustered, 1], 'labels': labels[clustered]})
p = sns.jointplot(data=df, x='x', y='y', hue='labels')
Expand Down

0 comments on commit 16ab4de

Please sign in to comment.