Skip to content

Commit 906609c

Browse files
Release notes (#74)
* Added release notes, updated docs * Updated embedder example notebooks (added example model in 'downloads') * add LOCAL flag to the embedder * Bugfix in Neo4j, added add_nodes_from_df and add_edges_from_df to PandasPGFrame
1 parent 8defd3b commit 906609c

File tree

10 files changed

+324
-163
lines changed

10 files changed

+324
-163
lines changed

README.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,13 @@ BlueGraph supports Python versions >= 3.7 and pip >= 21.0.1. To update pip from
100100
pip install --upgrade pip wheel setuptools
101101

102102

103+
The stable version of BlueGraph can be installed from PyPI using:
104+
105+
::
106+
107+
pip install bluegraph
108+
109+
103110
The development version of BlueGraph can be installed from the source by cloning the current repository as follows:
104111

105112
::

bluegraph/backends/neo4j/io.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def _generate_property_repr(properties, prop_types=None):
7474
# create a string property
7575
quote = "'"
7676
props.append("{}: {}{}{}".format(
77-
k, quote,
77+
k.replace(".", "_"), quote,
7878
str(preprocess_value(v)).replace("'", "\\'"), quote))
7979
elif isinstance(v, Iterable):
8080
# create a list property
@@ -85,10 +85,12 @@ def _generate_property_repr(properties, prop_types=None):
8585
else:
8686
values.append("'{}'".format(preprocess_value(vv)))
8787
if len(values) > 0:
88-
props.append("{}: [{}]".format(k, ", ".join(values)))
88+
props.append("{}: [{}]".format(
89+
k.replace(".", "_"), ", ".join(values)))
8990
elif prop_types[k] == "numeric" and not math.isnan(v):
9091
# create a numerical property
91-
props.append("{}: {}".format(k, preprocess_value(v)))
92+
props.append("{}: {}".format(
93+
k.replace(".", "_"), preprocess_value(v)))
9294
return props
9395

9496

bluegraph/core/io.py

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -654,14 +654,53 @@ def add_nodes(self, node_ids):
654654
new_df = new_df.set_index("@id")
655655
self._nodes = self._nodes.append(new_df)
656656

657+
def add_nodes_from_df(self, df, id_column, node_type=None,
658+
property_types=None):
659+
"""Add nodes from a dataframe."""
660+
if property_types is None:
661+
property_types = {}
662+
new_nodes = df.rename(columns={id_column: "@id"}).set_index("@id")
663+
664+
if node_type:
665+
new_nodes["@type"] = node_type
666+
self._nodes = pd.concat([self._nodes, new_nodes])
667+
668+
for c in new_nodes.columns:
669+
if c in property_types:
670+
self._set_node_prop_type(c, property_types[c])
671+
else:
672+
self.node_prop_as_category(c)
673+
657674
def add_edges(self, edges):
675+
"""Add edge ids to the PG frame."""
658676
sources = [e[0] for e in edges]
659677
targets = [e[1] for e in edges]
660678

661679
new_df = pd.DataFrame({"@source_id": sources, "@target_id": targets})
662680
new_df = new_df.set_index(["@source_id", "@target_id"])
663681
self._edges = self._edges.append(new_df)
664682

683+
def add_edges_from_df(self, df, source_column, target_column,
684+
edge_type=None, property_types=None):
685+
"""Add edges from a dataframe."""
686+
if property_types is None:
687+
property_types = {}
688+
new_edges = df.rename(
689+
columns={
690+
source_column: "@source_id",
691+
target_column: "@target_id",
692+
}).set_index(["@source_id", "@target_id"])
693+
694+
if edge_type:
695+
new_edges["@type"] = edge_type
696+
self._edges = pd.concat([self._edges, new_edges])
697+
698+
for c in new_edges.columns:
699+
if c in property_types:
700+
self._set_edge_prop_type(c, property_types[c])
701+
else:
702+
self.edge_prop_as_category(c)
703+
665704
def add_node_types(self, type_dict):
666705
type_df = pd.DataFrame(
667706
type_dict.items(), columns=["@id", "@type"])
@@ -1131,12 +1170,30 @@ def remove_isolated_nodes(self):
11311170
# Remove nodes
11321171
self._nodes = self._nodes.loc[~self._nodes.index.isin(isolates)]
11331172

1134-
def to_json(self):
1173+
def to_json(self, node_id_key=None, node_type_key=None,
1174+
edge_id_keys=None, edge_type_key=None):
11351175
"""Covert to a JSON dictionary."""
1136-
nodes_json = self._nodes.reset_index().to_dict(
1137-
orient="records")
1138-
edges_json = self._edges.reset_index().to_dict(
1139-
orient="records")
1176+
nodes = self._nodes.reset_index()
1177+
if node_id_key is not None:
1178+
nodes = nodes.rename(columns={"@id": node_id_key})
1179+
if node_type_key is not None and "@type" in nodes.columns:
1180+
nodes = nodes.rename(columns={"@type": node_type_key})
1181+
nodes_json = [
1182+
r.dropna().to_dict() for _, r in nodes.iterrows()
1183+
]
1184+
1185+
edges = self._edges.reset_index()
1186+
if edge_id_keys is not None:
1187+
edges = edges.rename(columns={
1188+
"@source_id": edge_id_keys[0],
1189+
"@target_id": edge_id_keys[1],
1190+
})
1191+
if edge_type_key is not None and "@type" in edges.columns:
1192+
edges = edges.rename(columns={"@type": edge_type_key})
1193+
edges_json = [
1194+
r.dropna().to_dict() for _, r in edges.iterrows()
1195+
]
1196+
11401197
return {
11411198
"nodes": nodes_json,
11421199
"edges": edges_json,

docs/source/releases/v0.1.2.rst

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
====================
2+
vO.1.2 Release Notes
3+
====================
4+
5+
This release includes some major bug-fixes, several new features and API changes described below.
6+
7+
8+
Blue Graph's core
9+
=================
10+
11+
PGFrame
12+
-------
13+
14+
Updates to the :code:`PGFrame` interface include:
15+
16+
- Added methods:
17+
- :code:`rename_node_properties` and :code:`rename_edge_properties` for changing property names;
18+
- :code:`add_nodes_from_df` and :code:`add_edges_from_df` for additing nodes and edges using dataframes.
19+
- Added the :code:`from_ontology` classmethod for importing (e.g. from Webprotege) ontologies as property graphs.
20+
- Property values that are added to existing properties are now aggregated into sets (and not replaced as it was before).
21+
22+
23+
Backend support
24+
===============
25+
26+
graph-tool
27+
----------
28+
29+
Fix of a major bug occuring during node merging.
30+
31+
32+
Neo4j
33+
-----
34+
35+
Several minor bugfixes of the Neo4j backend were included in this release. In additon, the interfaces of :code:`pgframe_to_neo4j` has changed:
36+
37+
- :code:`NaN` properties are skipped;
38+
- Node types can be used as Neo4j node labels;
39+
- Edge types can be used as Neo4j edge relationship types: edges with multiple types result in multiple Neo4j relationships with respective types and their properties replicated (this behaviour is implemented due to the fact that Neo4j relationships can have exactly one relationship type).
40+
41+
42+
Graph preprocessing with BlueGraph
43+
==================================
44+
45+
46+
Semantic property encoding
47+
--------------------------
48+
49+
Updates to the encoders:
50+
51+
- :code:`Word2VecModel` is renamed to :code:`Doc2VecEncoder` and is inherited from :code:`bluegraph.downstream.Preprocessor`;
52+
- Wrapped scikit-learn's :code:`TfidfVectorizer` into :code:`TfIdfEncoder` inheriting :code:`bluegraph.downstream.Preprocessor`.
53+
54+
The above-mentioned changes allow using the BlueGraph's encoders as a part of :code:`EmbeddingPipeline`).
55+
56+
57+
Downstream tasks with BlueGraph
58+
===============================
59+
60+
61+
Similarity API
62+
--------------
63+
64+
Similarity processor updates:
65+
66+
- Smarter handling of elements not existing in the index (when vectors or similar points are requested, :code:`None` is returned).
67+
- Segmented Faiss index can be initialized without vectors, the model can be trained on the first call to :code:`add`.
68+
69+
70+
71+
Embedding pipelines
72+
--------------------
73+
74+
Embedding pipeline updates:
75+
76+
- Added basic prediction interface (the :code:`run_prediction` method);
77+
- Modified :code:`get_similar_points` to be able to query similarity for the unknown vectors;
78+
- Optimized embedding pipeline memory usage: embedding table is not stored explicitly, but is a part of Faiss index.
79+
80+
81+
Services
82+
========
83+
84+
85+
Embedder
86+
--------
87+
88+
Embedder is a mini-service for retrieving embedding vectors and similar points distributed as a part of BlueGraph. A detailed description of the API can be found `here <https://github.com/BlueBrain/BlueGraph/blob/master/services/embedder/api.yaml>`_. Two examples can be found in the `Embedder API for NCIt term embedding <https://github.com/BlueBrain/BlueGraph/blob/master/services/embedder/examples/notebooks/Embedder%20API%20for%20NCIt%20term%20embedding.ipynb>`_ notebook and `Embedder API for node embedding <https://github.com/BlueBrain/BlueGraph/blob/master/services/embedder/examples/notebooks/Embedder%20API%20for%20node%20embedding.ipynb>`_.
89+
90+
This release includes the following updates to the service:
91+
92+
- Embedder app can predict vectors for unseen points, the following formats can be passed on the input
93+
* :code:`raw`: raw data as is
94+
* :code:`json_pgframe`: a JSON representation of a PGFrame
95+
* :code:`nexus_dataset`: endpoint, bucket, resource id and a Nexus token (in the request header), fetches the dataset by resource ID, downloads it and creates a PGFrame (dataset is a JSON representation of a PGFrame).
96+
- API changes: the POST method for :code:`embedding/` and :code:`similar-points/` operates on unseen points;
97+
- Dockerfile fix (smaller image size), dockerignore updates
98+
- Embedder app can fetch local models from the directory (specified in the configs).

docs/source/services_api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ Services API
33

44
This package contains a set of services distributed as a part of BlueGraph.
55

6+
67
BlueBrainEmbedder
78
-----------------
89

services/embedder/app.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,16 +129,16 @@ def _get_meta_data(model_name, file):
129129
app.config.from_pyfile('configs/app_config.py')
130130

131131

132-
try:
132+
if app.config["LOCAL"] is False:
133133
TOKEN = os.environ["NEXUS_TOKEN"]
134134
app.forge = KnowledgeGraphForge(
135135
app.config["FORGE_CONFIG"],
136136
token=TOKEN)
137-
except KeyError:
137+
else:
138138
app.forge = None
139139

140140
app.models = {}
141-
_retrieve_models()
141+
_retrieve_models(app.config["LOCAL"])
142142

143143
# --------------- Handlers ----------------
144144

services/embedder/configs/app_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,6 @@
1717
"""Embedding service configs."""
1818
FORGE_CONFIG = "configs/forge_configs/forge-config.yml"
1919
DOWNLOAD_DIR = "downloads/"
20+
LOCAL = True
2021

2122
DEBUG = True
704 KB
Binary file not shown.

0 commit comments

Comments
 (0)