Skip to content

Commit 5471399

Browse files
adding current progress
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
1 parent 3a29aaa commit 5471399

File tree

3 files changed

+35
-41
lines changed

3 files changed

+35
-41
lines changed

docs/reference/alpha-vector-database.md

+12
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,16 @@ def print_online_features(features):
108108
print(key, " : ", value)
109109

110110
print_online_features(features)
111+
```
112+
113+
### Configuration
114+
We offer two Online Store options for Vector Databases. PGVector and SQLite.
115+
116+
#### Installation with SQLite
117+
If you are using `pyenv` to manage your Python versions, you can install the SQLite extension with the following command:
118+
```bash
119+
PYTHON_CONFIGURE_OPTS="--enable-loadable-sqlite-extensions" \
120+
LDFLAGS="-L/opt/homebrew/opt/sqlite/lib" \
121+
CPPFLAGS="-I/opt/homebrew/opt/sqlite/include" \
122+
pyenv install
111123
```

sdk/python/feast/infra/online_stores/sqlite.py

+10-22
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313
# limitations under the License.
1414
import itertools
1515
import os
16+
import json
1617
import sqlite3
18+
import sqlite_vss
1719
from datetime import datetime
1820
from pathlib import Path
1921
from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple
2022

21-
import sqlite_vss
2223
from pydantic import StrictStr
2324

2425
from feast import Entity
@@ -110,9 +111,9 @@ def online_write_batch(
110111
created_ts = to_naive_utc(created_ts)
111112

112113
for feature_name, val in values.items():
113-
vector_val = None
114114
if config.online_store.vss_enabled:
115-
vector_val = self._get_list_val_str(val)
115+
print('using vector search')
116+
vector_val = json.dumps(val)
116117
conn.execute(
117118
f"""
118119
UPDATE {_table_id(project, table)}
@@ -121,7 +122,7 @@ def online_write_batch(
121122
""",
122123
(
123124
# SET
124-
val.SerializeToString(),
125+
str(val),
125126
vector_val,
126127
timestamp,
127128
created_ts,
@@ -138,13 +139,14 @@ def online_write_batch(
138139
(
139140
entity_key_bin,
140141
feature_name,
141-
val.SerializeToString(),
142+
str(val),
142143
vector_val,
143144
timestamp,
144145
created_ts,
145146
),
146147
)
147148
else:
149+
print('not using vector search')
148150
conn.execute(
149151
f"""
150152
UPDATE {_table_id(project, table)}
@@ -243,8 +245,8 @@ def update(
243245
project = config.project
244246

245247
for table in tables_to_keep:
246-
conn.execute(
247-
f"CREATE TABLE IF NOT EXISTS {_table_id(project, table)} (entity_key BLOB, feature_name TEXT, value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))"
248+
self.conn.execute(
249+
f"CREATE TABLE IF NOT EXISTS {_table_id(project, table)} (entity_key BLOB, feature_name TEXT, value BLOB, vector_value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))"
248250
)
249251
conn.execute(
250252
f"CREATE INDEX IF NOT EXISTS {_table_id(project, table)}_ek ON {_table_id(project, table)} (entity_key);"
@@ -282,20 +284,6 @@ def teardown(
282284
except FileNotFoundError:
283285
pass
284286

285-
def _get_list_val_str(self, val: ValueProto) -> str:
286-
if val.HasField("string_list_val"):
287-
return ",".join(val.string_list_val.val)
288-
elif val.HasField("bytes_list_val"):
289-
return ",".join(map(str, val.bytes_list_val.val))
290-
elif val.HasField("int64_list_val"):
291-
return ",".join(map(str, val.int64_list_val.val))
292-
elif val.HasField("float_list_val"):
293-
return ",".join(map(str, val.float_list_val.val))
294-
elif val.HasField("double_list_val"):
295-
return ",".join(map(str, val.double_list_val.val))
296-
else:
297-
raise ValueError("Unsupported list value type")
298-
299287
def retrieve_online_documents(
300288
self,
301289
config: RepoConfig,
@@ -434,7 +422,7 @@ def from_proto(sqlite_table_proto: SqliteTableProto) -> Any:
434422

435423
def update(self):
436424
self.conn.execute(
437-
f"CREATE TABLE IF NOT EXISTS {self.name} (entity_key BLOB, feature_name TEXT, value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))"
425+
f"CREATE TABLE IF NOT EXISTS {self.name} (entity_key BLOB, feature_name TEXT, value BLOB, vector_value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))"
438426
)
439427
self.conn.execute(
440428
f"CREATE INDEX IF NOT EXISTS {self.name}_ek ON {self.name} (entity_key);"

sdk/python/tests/unit/online_store/test_online_retrieval.py

+13-19
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ def test_get_online_documents() -> None:
426426
with runner.local_repo(
427427
get_example_repo("example_feature_repo_1.py"), "file"
428428
) as store:
429+
store.config.online_store.vss_enabled = True
429430
# Write some data to two tables
430431
document_embeddings_fv = store.get_feature_view(name="document_embeddings")
431432

@@ -434,15 +435,11 @@ def test_get_online_documents() -> None:
434435
item_key = EntityKeyProto(
435436
join_keys=["item_id"], entity_values=[ValueProto(int64_val=0)]
436437
)
437-
provider.online_write_batch(
438-
config=store.config,
439-
table=document_embeddings_fv,
440-
data=[
438+
data = [
441439
(
442440
item_key,
443441
{
444442
"Embeddings": [
445-
np.array(
446443
[
447444
0.17517076,
448445
-0.1259909,
@@ -454,9 +451,7 @@ def test_get_online_documents() -> None:
454451
0.01173803,
455452
-0.0573408,
456453
0.02616226,
457-
]
458-
),
459-
np.array(
454+
],
460455
[
461456
0.18517076,
462457
-0.1259909,
@@ -468,9 +463,7 @@ def test_get_online_documents() -> None:
468463
0.01173803,
469464
-0.0573408,
470465
0.02616226,
471-
]
472-
),
473-
np.array(
466+
],
474467
[
475468
0.19517076,
476469
-0.1259909,
@@ -482,9 +475,7 @@ def test_get_online_documents() -> None:
482475
0.01173803,
483476
-0.0573408,
484477
0.02616226,
485-
]
486-
),
487-
np.array(
478+
],
488479
[
489480
0.20517076,
490481
-0.1259909,
@@ -497,17 +488,20 @@ def test_get_online_documents() -> None:
497488
-0.0573408,
498489
0.02616226,
499490
]
500-
),
501-
]
491+
],
502492
},
503493
datetime.utcnow(),
504494
datetime.utcnow(),
505495
)
506-
],
496+
]
497+
provider.online_write_batch(
498+
config=store.config,
499+
table=document_embeddings_fv,
500+
data=data,
507501
progress=None,
508502
)
509503

510-
query = np.array(
504+
query_embedding = np.array(
511505
[
512506
0.17517076,
513507
-0.1259909,
@@ -523,7 +517,7 @@ def test_get_online_documents() -> None:
523517
)
524518
# Retrieve two features using two keys, one valid one non-existing
525519
result = store.retrieve_online_documents(
526-
feature="document_embeddings:Embeddings", query=query, top_k=3
520+
feature="document_embeddings:Embeddings", query=query_embedding, top_k=3
527521
).to_dict()
528522

529523
assert "Embeddings" in result

0 commit comments

Comments
 (0)