diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7b723b7..8790f03 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -85,7 +85,7 @@ jobs: docker buildx create --name mybuilder --platform linux/amd64,linux/arm64 --use # Running out of space for this build so commenting out for now #docker buildx build --push --platform linux/amd64 -t mbari/sdcat:$RELEASE_VERSION-cuda124 --label GIT_VERSION=$RELEASE_VERSION --label IMAGE_URI=mbari/sdcat:$RELEASE_VERSION-cuda124 -f docker/Dockerfile.cuda . - docker buildx build --push --platform linux/amd64,linux/arm64 -t mbari/sdcat:$RELEASE_VERSION --label GIT_VERSION=$RELEASE_VERSION --label IMAGE_URI=mbari/sdcat:$RELEASE_VERSION -f docker/Dockerfile . + docker buildx build --push --platform linux/amd64 -t mbari/sdcat:$RELEASE_VERSION --label GIT_VERSION=$RELEASE_VERSION --label IMAGE_URI=mbari/sdcat:$RELEASE_VERSION -f docker/Dockerfile . push_readme_to_dockerhub: runs-on: ubuntu-latest name: Push README to Docker Hub diff --git a/sdcat/cluster/cluster.py b/sdcat/cluster/cluster.py old mode 100644 new mode 100755 index dff672e..b906556 --- a/sdcat/cluster/cluster.py +++ b/sdcat/cluster/cluster.py @@ -214,17 +214,26 @@ def _run_hdbscan_assign( init = 'spectral' # Reduce the dimensionality of the embeddings using UMAP to 2 dimensions to visualize the clusters - if have_gpu: - xx = cuUMAP(init=init, - n_components=2, - n_neighbors=3, - min_dist=0.1, - metric='euclidean').fit_transform(df.values) + n_neighbors = min(15, df.values.shape[0] - 1) + info(f'Using {n_neighbors} neighbors for dimensional reduction') + if n_neighbors < 2: + warn('Using PCA instead of UMAP') + from sklearn.decomposition import PCA + pca = PCA(n_components=2) + xx = pca.fit_transform(df.values) else: - xx = UMAP(init=init, - n_components=2, - metric='cosine', - low_memory=True).fit_transform(df.values) + if have_gpu: + xx = cuUMAP(init=init, + n_components=2, + n_neighbors=n_neighbors, + min_dist=0.1, + metric='euclidean').fit_transform(df.values) + else: + xx = UMAP(init=init, + n_components=2, + n_neighbors=n_neighbors, + metric='cosine', + low_memory=True).fit_transform(df.values) df = pd.DataFrame({'x': xx[clustered, 0], 'y': xx[clustered, 1], 'labels': labels[clustered]}) p = sns.jointplot(data=df, x='x', y='y', hue='labels')