-
Notifications
You must be signed in to change notification settings - Fork 629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Embedding doc #424
Embedding doc #424
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -143,6 +143,9 @@ def text(self, tag): | |
return self.reader.get_text(tag) | ||
|
||
def embedding(self): | ||
""" | ||
Get the embedding reader. | ||
""" | ||
return self.reader.get_embedding(EMBEDDING_TAG) | ||
|
||
def audio(self, tag): | ||
|
@@ -292,9 +295,19 @@ def text(self, tag): | |
return self.writer.new_text(tag) | ||
|
||
def embedding(self): | ||
""" | ||
Create an embedding writer that used to write | ||
embedding data. | ||
|
||
:return: A embedding writer to record embedding data | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An embedding |
||
:rtype: embeddingWriter | ||
""" | ||
return self.writer.new_embedding(EMBEDDING_TAG) | ||
|
||
def save(self): | ||
""" | ||
Force the VisualDL to sync with the file system. | ||
""" | ||
self.writer.save() | ||
|
||
def __enter__(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -307,19 +307,18 @@ def get_embeddings(storage, mode, reduction, dimension=2, num_records=5000): | |
with storage.mode(mode) as reader: | ||
embedding = reader.embedding() | ||
labels = embedding.get_all_labels() | ||
high_dimensional_vectors = embedding.get_all_embeddings() | ||
high_dimensional_vectors = np.array(embedding.get_all_embeddings()) | ||
|
||
# TODO: Move away from sklearn | ||
if reduction == 'tsne': | ||
from sklearn.manifold import TSNE | ||
tsne = TSNE( | ||
perplexity=30, n_components=dimension, init='pca', n_iter=5000) | ||
low_dim_embs = tsne.fit_transform(high_dimensional_vectors) | ||
import tsne | ||
low_dim_embs = tsne.tsne( | ||
high_dimensional_vectors, | ||
dimension, | ||
initial_dims=50, | ||
perplexity=30.0) | ||
|
||
elif reduction == 'pca': | ||
from sklearn.decomposition import PCA | ||
pca = PCA(n_components=3) | ||
low_dim_embs = pca.fit_transform(high_dimensional_vectors) | ||
low_dim_embs = simple_pca(high_dimensional_vectors, dimension) | ||
|
||
return {"embedding": low_dim_embs.tolist(), "labels": labels} | ||
|
||
|
@@ -393,3 +392,23 @@ def _handler(key, func, *args, **kwargs): | |
return data | ||
|
||
return _handler | ||
|
||
|
||
# A simple PCA implementaiton to do the dimension reduction. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Normally we comment methods like this:
|
||
def simple_pca(x, dimension): | ||
# Center the data. | ||
x -= np.mean(x, axis=0) | ||
|
||
# Computing the Covariance Matrix | ||
cov = np.cov(x, rowvar=False) | ||
|
||
# Get eigenvectors and eigenvalues from the covariance matrix | ||
eigvals, eigvecs = np.linalg.eig(cov) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What a math guy, Jeff! But do we need to do it ourselves? Moreover, SVD is more stable than eigenvector in terms of computing principle components. Check here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason behind it so we don't have to import another pip package to do the calculation. |
||
|
||
# Sort the eigvals from high to low | ||
order = np.argsort(eigvals)[::-1] | ||
|
||
# Drop the eigenvectors with low eigenvalues | ||
eigvecs = eigvecs[:, order[:dimension]] | ||
|
||
return np.dot(x, eigvecs) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is used to