Skip to content

Commit

Permalink
Merge pull request #9 from elastic/query_layer
Browse files Browse the repository at this point in the history
Query layer
  • Loading branch information
comath committed Jul 20, 2020
2 parents 8bc5770 + 08385c8 commit ead889a
Show file tree
Hide file tree
Showing 41 changed files with 364 additions and 184 deletions.
5 changes: 3 additions & 2 deletions goko/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "goko"
version = "0.3.2"
version = "0.3.3"
edition = "2018"

description = "A lock-free, eventually consistent, concurrent covertree."
Expand Down Expand Up @@ -34,8 +34,9 @@ protobuf = "2.16.2"
rand = "0.7.3"
yaml-rust = "0.4.4"
pbr = "1.0.3"
fxhash = "0.2.1"
rayon = "1.3.1"
indexmap = {version = "1.4.0", features = ["serde-1"]}
hashbrown = { version = "0.8", features = ["rayon"] }
crossbeam-channel = "0.4.2"
pointcloud = { version = "0.3.0", path = "../pointcloud" }
#evmap = { git = "https://github.com/comath/rust-evmap" }
Expand Down
4 changes: 2 additions & 2 deletions goko/examples/ember_sequence_track.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ fn test_run(
for i in 0..count {
let point_index: usize = rng.gen_range(0, test_set.len());
let point = test_set.point(point_index).unwrap();
let trace = tracker.tree_reader().dry_insert(point).unwrap();
tracker.add_dry_insert(trace);
let trace = tracker.tree_reader().path(point).unwrap();
tracker.add_path(trace);
if i % 5 == 0 {
stats.push(tracker.current_stats());
}
Expand Down
11 changes: 7 additions & 4 deletions goko/src/builders.rs → goko/src/covertree/builders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@

use crate::plugins::TreePluginSet;
use crate::*;
use data_caches::*;
use layer::*;
use node::*;
use super::*;
use super::data_caches::*;
use super::layer::*;
use super::node::*;
use pbr::ProgressBar;
use std::fs::read_to_string;
//use pointcloud::*;
use std::cmp::{max, min};
use std::path::Path;
use std::sync::{atomic, Arc, RwLock};
Expand Down Expand Up @@ -290,10 +290,13 @@ impl CoverTreeBuilder {
pb.format("╢▌▌░╟");
}

let (_final_addresses_reader, final_addresses) = monomap::new();

let mut cover_tree = CoverTreeWriter {
parameters: Arc::clone(&parameters),
layers,
root_address,
final_addresses,
};

let mut inserted_nodes: usize = 0;
Expand Down
File renamed without changes.
12 changes: 6 additions & 6 deletions goko/src/layer.rs → goko/src/covertree/layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@
//! There is also an experimental pair of cluster hashmaps, which need to be replaced by a data structure that
//! respects and represents the nerve more.

use crate::evmap::monomap::{MonoReadHandle, MonoWriteHandle};
use crate::monomap::{MonoReadHandle, MonoWriteHandle};
use pointcloud::*;

//use rayon;
use super::*;
use node::*;
use crate::*;
use super::node::*;
use std::iter::FromIterator;
use tree_file_format::*;
use crate::tree_file_format::*;

/// Actual reader, primarily contains a read head to the hash-map.
/// This also contains a reference to the scale_index so that it is easy to save and load. It is largely redundant,
Expand Down Expand Up @@ -155,7 +155,7 @@ impl<D: PointCloud> CoverLayerWriter<D> {

/// Constructs the object. To construct a reader call `reader`.
pub(crate) fn new(scale_index: i32) -> CoverLayerWriter<D> {
let (_node_reader, node_writer) = evmap::monomap::new();
let (_node_reader, node_writer) = monomap::new();
CoverLayerWriter {
scale_index,
node_writer,
Expand All @@ -171,7 +171,7 @@ impl<D: PointCloud> CoverLayerWriter<D> {

pub(crate) fn load(layer_proto: &LayerProto) -> CoverLayerWriter<D> {
let scale_index = layer_proto.get_scale_index();
let (_node_reader, mut node_writer) = evmap::monomap::new();
let (_node_reader, mut node_writer) = monomap::new();
for node_proto in layer_proto.get_nodes() {
let index = node_proto.get_center_index() as PointIndex;
let node = CoverNode::load(node_proto);
Expand Down
12 changes: 12 additions & 0 deletions goko/src/covertree/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@


pub(crate) mod data_caches;
pub(crate) mod builders;
pub mod layer;
pub mod node;
pub mod query_tools;

mod tree;

pub use builders::CoverTreeBuilder;
pub use tree::*;
4 changes: 2 additions & 2 deletions goko/src/node.rs → goko/src/covertree/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
//!
use crate::errors::{GokoError, GokoResult};
use crate::plugins::{NodePlugin, NodePluginSet, labels::NodeLabelSummary};
use crate::query_tools::{RoutingQueryHeap, SingletonQueryHeap};
use super::query_tools::{RoutingQueryHeap, SingletonQueryHeap};
use crate::tree_file_format::*;
use crate::NodeAddress;

Expand Down Expand Up @@ -453,7 +453,7 @@ mod tests {
use crate::query_tools::knn_query_heap::tests::clone_unvisited_nodes;
use crate::query_tools::query_items::QueryAddress;
use crate::query_tools::KnnQueryHeap;
use crate::tree::tests::build_mnist_tree;
use crate::covertree::tests::build_mnist_tree;

fn create_test_node<D: PointCloud>() -> CoverNode<D> {
let children = Some(NodeChildren {
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
69 changes: 58 additions & 11 deletions goko/src/tree.rs → goko/src/covertree/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,17 @@
//! The hashmap pair idea is in `layer` and originally comes from Jon Gjengset.

use crate::*;
use layer::*;
use node::*;
use super::layer::*;
use super::node::*;
//use pointcloud::*;

use std::sync::{atomic, Arc, RwLock};
use tree_file_format::*;
use crate::tree_file_format::*;
use crate::monomap::{MonoReadHandle, MonoWriteHandle};

use crate::plugins::{GokoPlugin, TreePluginSet};
use crate::query_tools::{KnnQueryHeap, MultiscaleQueryHeap, RoutingQueryHeap};
use errors::GokoResult;
use super::query_tools::{KnnQueryHeap, MultiscaleQueryHeap, RoutingQueryHeap};
use errors::{GokoResult,GokoError};
use std::collections::HashMap;
use std::iter::Iterator;
use std::iter::Rev;
Expand Down Expand Up @@ -102,16 +103,17 @@ pub struct CoverTreeReader<D: PointCloud> {
parameters: Arc<CoverTreeParameters<D>>,
layers: Vec<CoverLayerReader<D>>,
root_address: NodeAddress,
final_addresses: MonoReadHandle<PointIndex,NodeAddress>,
}



impl<D: PointCloud> Clone for CoverTreeReader<D> {
fn clone(&self) -> CoverTreeReader<D> {
CoverTreeReader {
parameters: self.parameters.clone(),
layers: self.layers.clone(),
root_address: self.root_address,
final_addresses: self.final_addresses.clone(),
}
}
}
Expand Down Expand Up @@ -367,7 +369,7 @@ impl<D: PointCloud> CoverTreeReader<D> {
}

/// # Dry Insert Query
pub fn dry_insert<'a, T: Into<PointRef<'a>>>(
pub fn path<'a, T: Into<PointRef<'a>>>(
&self,
point: T,
) -> GokoResult<Vec<(f32, NodeAddress)>> {
Expand Down Expand Up @@ -395,6 +397,22 @@ impl<D: PointCloud> CoverTreeReader<D> {
Ok(trace)
}

///
pub fn known_path(&self, point_index: PointIndex) -> GokoResult<Vec<(f32,NodeAddress)>> {
self.final_addresses.get_and(&point_index,|addr| {
let mut path = Vec::with_capacity((self.root_address().0 - addr.0) as usize);
let mut parent = Some(*addr);
while let Some(addr) = parent {
path.push(addr);
parent = self.get_node_and(addr,|n| n.parent_address()).flatten();
}
(&mut path[..]).reverse();
let point_indexes: Vec<PointIndex> = path.iter().map(|na| na.1).collect();
let dists = self.parameters.point_cloud.distances_to_point_index(point_index,&point_indexes[..]).unwrap();
dists.iter().zip(path).map(|(d,a)|(*d,a)).collect()
}).ok_or(GokoError::IndexNotInTree(point_index))
}

///Computes the fractal dimension of a node
pub fn node_fractal_dim(&self, node_address: NodeAddress) -> f32 {
let count: f32 = self
Expand Down Expand Up @@ -517,6 +535,7 @@ pub struct CoverTreeWriter<D: PointCloud> {
pub(crate) parameters: Arc<CoverTreeParameters<D>>,
pub(crate) layers: Vec<CoverLayerWriter<D>>,
pub(crate) root_address: NodeAddress,
pub(crate) final_addresses: MonoWriteHandle<PointIndex,NodeAddress>,
}

impl<D: PointCloud + LabeledCloud> CoverTreeWriter<D> {
Expand Down Expand Up @@ -564,6 +583,7 @@ impl<D: PointCloud> CoverTreeWriter<D> {
parameters: Arc::clone(&self.parameters),
layers: self.layers.iter().map(|l| l.reader()).collect(),
root_address: self.root_address,
final_addresses: self.final_addresses.factory().handle(),
}
}

Expand Down Expand Up @@ -598,11 +618,38 @@ impl<D: PointCloud> CoverTreeWriter<D> {
.map(|l| CoverLayerWriter::load(l))
.collect();

Ok(CoverTreeWriter {
let (_final_addresses_reader, final_addresses) = monomap::new();

let mut tree = CoverTreeWriter {
parameters,
layers,
root_address,
})
final_addresses,
};

tree.refresh_final_indexes();

Ok(tree)
}

fn refresh_final_indexes(&mut self) {
let reader = self.reader();
let mut unvisited_nodes: Vec<NodeAddress> = vec![self.root_address];
while !unvisited_nodes.is_empty() {
let cur_add = unvisited_nodes.pop().unwrap();
reader.get_node_and(cur_add, |n| {
for singleton in n.singletons() {
self.final_addresses.insert(*singleton,cur_add);
}
if let Some((nested_si,child_addresses)) = n.children() {
unvisited_nodes.extend(child_addresses);
unvisited_nodes.push((nested_si,cur_add.1));
}
}).unwrap();
}

self.final_addresses.refresh();
self.final_addresses.refresh();
}

/// Encodes the tree into a protobuf. See `utils::save_tree` for saving to a file on disk.
Expand Down Expand Up @@ -731,10 +778,10 @@ pub(crate) mod tests {
}

#[test]
fn dry_insert_sanity() {
fn path_sanity() {
let writer = build_basic_tree();
let reader = writer.reader();
let trace = reader.dry_insert(&[0.495f32][..]).unwrap();
let trace = reader.path(&[0.495f32][..]).unwrap();
assert!(trace.len() == 4 || trace.len() == 3);
println!("{:?}", trace);
for i in 0..(trace.len() - 1) {
Expand Down
10 changes: 6 additions & 4 deletions goko/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ use std::fmt;
use std::io;
use std::str;

use crate::PointIndex;

/// Helper type for a call that could go wrong.
pub type GokoResult<T> = Result<T, GokoError>;

Expand All @@ -36,7 +38,7 @@ pub enum GokoError {
/// Unable to retrieve some data point (given by index) in a file (slice name)
PointCloudError(PointCloudError),
/// Most common error, the given point name isn't present in the training data
NameNotInTree(String),
IndexNotInTree(PointIndex),
/// IO error when opening files
IoError(io::Error),
/// Parsing error when loading a CSV file
Expand All @@ -54,7 +56,7 @@ impl fmt::Display for GokoError {
GokoError::IoError(ref e) => write!(f, "{}", e),
GokoError::ParsingError(ref e) => write!(f, "{}", e),
GokoError::PointCloudError(ref e) => write!(f, "{}", e),
GokoError::NameNotInTree { .. } => {
GokoError::IndexNotInTree { .. } => {
write!(f, "there was an issue grabbing a name from the known names")
}
GokoError::DoubleNest => write!(
Expand All @@ -77,7 +79,7 @@ impl Error for GokoError {
GokoError::IoError(ref e) => e.description(),
GokoError::ParsingError(ref e) => e.description(),
GokoError::PointCloudError(ref e) => e.description(),
GokoError::NameNotInTree { .. } => {
GokoError::IndexNotInTree { .. } => {
"there was an issue grabbing a name from the known names"
}
GokoError::DoubleNest => {
Expand All @@ -94,7 +96,7 @@ impl Error for GokoError {
GokoError::IoError(ref e) => Some(e),
GokoError::ParsingError(ref e) => Some(e),
GokoError::PointCloudError(ref e) => Some(e),
GokoError::NameNotInTree { .. } => None,
GokoError::IndexNotInTree { .. } => None,
GokoError::DoubleNest => None,
GokoError::InsertBeforeNest => None,
}
Expand Down
30 changes: 0 additions & 30 deletions goko/src/evmap/mod.rs

This file was deleted.

16 changes: 7 additions & 9 deletions goko/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#![warn(missing_docs)]
#![doc(test(attr(allow(unused_variables), deny(warnings))))]
#![feature(binary_heap_into_iter_sorted)]
#![feature(iterator_fold_self)]

//! # Goko
//! This is an lock-free efficient implementation of a covertree for data science. The traditional
Expand Down Expand Up @@ -60,21 +61,18 @@ use pointcloud::*;
pub mod errors;
pub use errors::GokoResult;

pub(crate) mod evmap;
pub(crate) mod monomap;

mod covertree;
pub use covertree::*;

pub mod query_interface;

mod builders;
mod data_caches;
pub mod layer;
pub mod node;
pub mod query_tools;
mod tree;
mod tree_file_format;
pub mod utils;

pub mod plugins;

pub use builders::CoverTreeBuilder;
pub use tree::*;

/// The data structure explicitly seperates the covertree by layer, and the addressing schema for nodes
/// is a pair for the layer index and the center point index of that node.
Expand Down
File renamed without changes.
Loading

0 comments on commit ead889a

Please sign in to comment.