Skip to content

Commit

Permalink
Embedding doc (#424)
Browse files Browse the repository at this point in the history
* Use a more light weight method to get PCA and T-SNE. Update embedding documentations. Update loading animation

* Reveal the embedding tab

* Add the auto reload. and fix typos

* Update comments
  • Loading branch information
jetfuel authored Apr 26, 2018
1 parent c1c2232 commit 2642aab
Show file tree
Hide file tree
Showing 8 changed files with 289 additions and 28 deletions.
12 changes: 5 additions & 7 deletions frontend/src/common/component/AppMenu.vue
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,11 @@ export default {
title: 'TEXTS',
name: 'texts',
},
/* // Hide the top menu
{
url: '/HighDimensional',
title: 'HighDimensional',
name: 'HighDimensional'
}
*/
{
url: '/HighDimensional',
title: 'HighDimensional',
name: 'HighDimensional',
},
],
};
},
Expand Down
30 changes: 29 additions & 1 deletion frontend/src/high-dimensional/HighDimensional.vue
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
:search-text="config.searchText"
:dimension="config.dimension"
:embedding-data="embeddingData"
:show-loading="showLoading"
/>
</div>
<div class="visual-dl-page-right">
Expand All @@ -26,6 +27,9 @@ import autoAdjustHeight from '../common/util/autoAdjustHeight';
import Config from './ui/Config';
import Chart from './ui/Chart';
// the time to refresh chart data
const intervalTime = 30;
export default {
components: {
'ui-config': Config,
Expand All @@ -39,11 +43,12 @@ export default {
searchText: '',
displayWordLabel: true,
dimension: '2',
reduction: 'tsne',
reduction: 'pca',
selectedRun: '',
running: true,
},
embeddingData: [],
showLoading: false,
};
},
created() {
Expand All @@ -55,6 +60,13 @@ export default {
this.config.selectedRun = data[0];
}
});
if (this.config.running) {
this.startInterval();
}
},
beforeDestroy() {
this.stopInterval();
},
watch: {
'config.dimension': function(val) {
Expand All @@ -66,6 +78,9 @@ export default {
'config.selectedRun': function(val) {
this.fetchDatasets();
},
'config.running': function(val) {
val ? this.startInterval() : this.stopInterval();
},
},
mounted() {
autoAdjustHeight();
Expand All @@ -82,14 +97,27 @@ export default {
},
},
methods: {
stopInterval() {
clearInterval(this.getOringDataInterval);
},
// get origin data per {{intervalTime}} seconds
startInterval() {
this.getOringDataInterval = setInterval(() => {
this.fetchDatasets();
}, intervalTime * 1000);
},
fetchDatasets() {
this.showLoading = true;
// Fetch the data from the server. Passing dimension and reduction method
let params = {
dimension: this.config.dimension,
reduction: this.config.reduction,
run: this.config.selectedRun,
};
getHighDimensionalDatasets(params).then(({errno, data}) => {
this.showLoading = false;
let vectorData = data.embedding;
let labels = data.labels;
Expand Down
16 changes: 11 additions & 5 deletions frontend/src/high-dimensional/ui/Chart.vue
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ export default {
type: String,
required: true,
},
showLoading: {
type: Boolean,
required: true,
},
},
data() {
return {
Expand All @@ -53,15 +57,11 @@ export default {
created() {},
mounted() {
this.createChart();
this.myChart.showLoading();
this.set2DChartOptions();
this.setDisplayWordLabel();
},
watch: {
embeddingData: function(val) {
this.myChart.hideLoading();
// Got new data, pass to the filter function to render the 'matched' set and 'not matched' set
this.filterSeriesDataAndSetOption(this.searchText);
},
Expand All @@ -70,7 +70,6 @@ export default {
},
dimension: function(val) {
this.myChart.clear();
this.myChart.showLoading();
if (val === '2') {
this.set2DChartOptions();
this.setDisplayWordLabel();
Expand All @@ -82,6 +81,13 @@ export default {
searchText: function(val) {
this.filterSeriesDataAndSetOption(val);
},
showLoading: function(val) {
if (val) {
this.myChart.showLoading();
} else {
this.myChart.hideLoading();
}
},
},
methods: {
createChart() {
Expand Down
7 changes: 4 additions & 3 deletions frontend/src/high-dimensional/ui/Config.vue
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,13 @@
label="Reduction Method"
v-model="config.reduction"
dark>
<v-radio
label="T-SNE"
value="tsne"/>
<v-radio
label="PCA"
value="pca"/>
<v-radio
label="T-SNE"
value="tsne"/>

</v-radio-group>

<v-radio-group
Expand Down
15 changes: 12 additions & 3 deletions visualdl/logic/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,19 @@ PYBIND11_MODULE(core, m) {
.def("total_records", &cp::TextReader::total_records)
.def("size", &cp::TextReader::size);

py::class_<cp::Embedding>(m, "EmbeddingWriter")
py::class_<cp::Embedding>(m, "EmbeddingWriter", R"pbdoc(
PyBind class. Must instantiate through the LogWriter.
)pbdoc")
.def("set_caption", &cp::Embedding::SetCaption)
.def("add_embeddings_with_word_list",
&cp::Embedding::AddEmbeddingsWithWordList);
.def(
"add_embeddings_with_word_list"
R"pbdoc(
Add embedding record. Each run can only store one embedding data.
:param embedding: hot vector of embedding words
:type embedding: list
)pbdoc",
&cp::Embedding::AddEmbeddingsWithWordList);

py::class_<cp::EmbeddingReader>(m, "EmbeddingReader")
.def("get_all_labels", &cp::EmbeddingReader::get_all_labels)
Expand Down
13 changes: 13 additions & 0 deletions visualdl/python/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ def text(self, tag):
return self.reader.get_text(tag)

def embedding(self):
"""
Get the embedding reader.
"""
return self.reader.get_embedding(EMBEDDING_TAG)

def audio(self, tag):
Expand Down Expand Up @@ -292,9 +295,19 @@ def text(self, tag):
return self.writer.new_text(tag)

def embedding(self):
"""
Create an embedding writer that is used to write
embedding data.
:return: An embedding writer to record embedding data
:rtype: embeddingWriter
"""
return self.writer.new_embedding(EMBEDDING_TAG)

def save(self):
"""
Force the VisualDL to sync with the file system.
"""
self.writer.save()

def __enter__(self):
Expand Down
40 changes: 31 additions & 9 deletions visualdl/server/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,19 +307,18 @@ def get_embeddings(storage, mode, reduction, dimension=2, num_records=5000):
with storage.mode(mode) as reader:
embedding = reader.embedding()
labels = embedding.get_all_labels()
high_dimensional_vectors = embedding.get_all_embeddings()
high_dimensional_vectors = np.array(embedding.get_all_embeddings())

# TODO: Move away from sklearn
if reduction == 'tsne':
from sklearn.manifold import TSNE
tsne = TSNE(
perplexity=30, n_components=dimension, init='pca', n_iter=5000)
low_dim_embs = tsne.fit_transform(high_dimensional_vectors)
import tsne
low_dim_embs = tsne.tsne(
high_dimensional_vectors,
dimension,
initial_dims=50,
perplexity=30.0)

elif reduction == 'pca':
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
low_dim_embs = pca.fit_transform(high_dimensional_vectors)
low_dim_embs = simple_pca(high_dimensional_vectors, dimension)

return {"embedding": low_dim_embs.tolist(), "labels": labels}

Expand Down Expand Up @@ -393,3 +392,26 @@ def _handler(key, func, *args, **kwargs):
return data

return _handler


def simple_pca(x, dimension):
"""
A simple PCA implementation to do the dimension reduction.
"""

# Center the data.
x -= np.mean(x, axis=0)

# Computing the Covariance Matrix
cov = np.cov(x, rowvar=False)

# Get eigenvectors and eigenvalues from the covariance matrix
eigvals, eigvecs = np.linalg.eig(cov)

# Sort the eigvals from high to low
order = np.argsort(eigvals)[::-1]

# Drop the eigenvectors with low eigenvalues
eigvecs = eigvecs[:, order[:dimension]]

return np.dot(x, eigvecs)
Loading

0 comments on commit 2642aab

Please sign in to comment.