-
Notifications
You must be signed in to change notification settings - Fork 56
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add document interface to Graphql (#1272)
* re-enable vector tests * reestructure vectors module * add empty graph test * remove embeddings module from raphtory-graphql * expose document interface through GraphQL * split generate_doc into different trait * make DocumentSource private * make EntityDocument public for the crate * expose document id as a vector of strings * fix compilation errors and warnings * ignore empty graph test in vectors module * fix tantivity bug * remove unused code
- Loading branch information
1 parent
a99719e
commit 82e3f2d
Showing
19 changed files
with
957 additions
and
980 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
use dynamic_graphql::SimpleObject; | ||
use raphtory::vectors::Document; | ||
|
||
#[derive(SimpleObject)] | ||
pub(crate) struct GqlDocument { | ||
/// Return a vector with the name of the node or the names of src and dst of the edge: [src, dst] | ||
name: Vec<String>, // size 1 for nodes, size 2 for edges: [src, dst] | ||
/// Return the type of entity: "node" or "edge" | ||
entity_type: String, | ||
content: String, | ||
} | ||
|
||
impl From<Document> for GqlDocument { | ||
fn from(value: Document) -> Self { | ||
match value { | ||
Document::Node { name, content } => Self { | ||
name: vec![name], | ||
entity_type: "node".to_owned(), | ||
content, | ||
}, | ||
Document::Edge { src, dst, content } => Self { | ||
name: vec![src, dst], | ||
entity_type: "edge".to_owned(), | ||
content, | ||
}, | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
use crate::{ | ||
db::graph::{edge::EdgeView, vertex::VertexView}, | ||
prelude::{EdgeViewOps, GraphViewOps, VertexViewOps}, | ||
vectors::{entity_id::EntityId, EntityDocument}, | ||
}; | ||
|
||
pub(crate) trait DocumentSource: Sized { | ||
fn generate_doc<T>(&self, template: &T) -> EntityDocument | ||
where | ||
T: Fn(&Self) -> String; | ||
} | ||
|
||
impl<G: GraphViewOps> DocumentSource for VertexView<G> { | ||
fn generate_doc<T>(&self, template: &T) -> EntityDocument | ||
where | ||
T: Fn(&Self) -> String, | ||
{ | ||
let raw_content = template(self); | ||
let content = match raw_content.char_indices().nth(1000) { | ||
Some((index, _)) => (&raw_content[..index]).to_owned(), | ||
None => raw_content, | ||
}; | ||
// TODO: allow multi document entities !!!!! | ||
// shortened to 1000 (around 250 tokens) to avoid exceeding the max number of tokens, | ||
// when embedding but also when inserting documents into prompts | ||
|
||
EntityDocument { | ||
id: EntityId::Node { id: self.id() }, | ||
content, | ||
} | ||
} | ||
} | ||
|
||
impl<G: GraphViewOps> DocumentSource for EdgeView<G> { | ||
fn generate_doc<T>(&self, template: &T) -> EntityDocument | ||
where | ||
T: Fn(&Self) -> String, | ||
{ | ||
let content = template(self); | ||
EntityDocument { | ||
id: EntityId::Edge { | ||
src: self.src().id(), | ||
dst: self.dst().id(), | ||
}, | ||
content, | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
use crate::vectors::Embedding; | ||
use async_openai::{ | ||
types::{CreateEmbeddingRequest, EmbeddingInput}, | ||
Client, | ||
}; | ||
use itertools::Itertools; | ||
|
||
pub async fn openai_embedding(texts: Vec<String>) -> Vec<Embedding> { | ||
println!("computing embeddings for {} texts", texts.len()); | ||
let client = Client::new(); | ||
let request = CreateEmbeddingRequest { | ||
model: "text-embedding-ada-002".to_owned(), | ||
input: EmbeddingInput::StringArray(texts), | ||
user: None, | ||
}; | ||
let response = client.embeddings().create(request).await.unwrap(); | ||
println!("Generated embeddings successfully"); | ||
response.data.into_iter().map(|e| e.embedding).collect_vec() | ||
} | ||
|
||
// this is currently commented out so we don't need to add any new dependencies | ||
// but might be potentially useful in the future | ||
// async fn sentence_transformers_embeddings(texts: Vec<String>) -> Vec<Embedding> { | ||
// println!("computing embeddings for {} texts", texts.len()); | ||
// Python::with_gil(|py| { | ||
// let sentence_transformers = py.import("sentence_transformers")?; | ||
// let locals = [("sentence_transformers", sentence_transformers)].into_py_dict(py); | ||
// locals.set_item("texts", texts); | ||
// | ||
// let pyarray: &PyArray2<f32> = py | ||
// .eval( | ||
// &format!( | ||
// "sentence_transformers.SentenceTransformer('thenlper/gte-small').encode(texts)" | ||
// ), | ||
// Some(locals), | ||
// None, | ||
// )? | ||
// .extract()?; | ||
// | ||
// let readonly = pyarray.readonly(); | ||
// let chunks = readonly.as_slice().unwrap().chunks(384).into_iter(); | ||
// let embeddings = chunks | ||
// .map(|chunk| chunk.iter().copied().collect_vec()) | ||
// .collect_vec(); | ||
// | ||
// Ok::<Vec<Vec<f32>>, Box<dyn std::error::Error>>(embeddings) | ||
// }) | ||
// .unwrap() | ||
// } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
use crate::{ | ||
db::graph::{edge::EdgeView, vertex::VertexView}, | ||
prelude::{EdgeViewOps, GraphViewOps, VertexViewOps}, | ||
}; | ||
use serde::Serializer; | ||
use std::fmt::{Display, Formatter}; | ||
|
||
#[derive(Clone, Debug, Eq, PartialEq, Hash)] | ||
pub(crate) enum EntityId { | ||
Node { id: u64 }, | ||
Edge { src: u64, dst: u64 }, | ||
} | ||
|
||
impl<G: GraphViewOps> From<&VertexView<G>> for EntityId { | ||
fn from(value: &VertexView<G>) -> Self { | ||
EntityId::Node { id: value.id() } | ||
} | ||
} | ||
|
||
impl<G: GraphViewOps> From<VertexView<G>> for EntityId { | ||
fn from(value: VertexView<G>) -> Self { | ||
EntityId::Node { id: value.id() } | ||
} | ||
} | ||
|
||
impl<G: GraphViewOps> From<&EdgeView<G>> for EntityId { | ||
fn from(value: &EdgeView<G>) -> Self { | ||
EntityId::Edge { | ||
src: value.src().id(), | ||
dst: value.dst().id(), | ||
} | ||
} | ||
} | ||
|
||
impl<G: GraphViewOps> From<EdgeView<G>> for EntityId { | ||
fn from(value: EdgeView<G>) -> Self { | ||
EntityId::Edge { | ||
src: value.src().id(), | ||
dst: value.dst().id(), | ||
} | ||
} | ||
} | ||
|
||
impl Display for EntityId { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { | ||
match self { | ||
EntityId::Node { id } => f.serialize_u64(*id), | ||
EntityId::Edge { src, dst } => { | ||
f.serialize_u64(*src) | ||
.expect("src ID couldn't be serialized"); | ||
f.write_str("-") | ||
.expect("edge ID separator couldn't be serialized"); | ||
f.serialize_u64(*dst) | ||
} | ||
} | ||
} | ||
} |
Oops, something went wrong.