diff --git a/docs/reference/alpha-vector-database.md b/docs/reference/alpha-vector-database.md index 37d9b9cdf8..b9ce7f408a 100644 --- a/docs/reference/alpha-vector-database.md +++ b/docs/reference/alpha-vector-database.md @@ -13,7 +13,9 @@ Below are supported vector databases and implemented features: | Elasticsearch | [x] | [x] | | Milvus | [ ] | [ ] | | Faiss | [ ] | [ ] | +| SQLite | [x] | [ ] | +Note: SQLite is in limited access and only working on Python 3.10. It will be updated as [sqlite_vec](https://github.com/asg017/sqlite-vec/) progresses. ## Example @@ -108,4 +110,20 @@ def print_online_features(features): print(key, " : ", value) print_online_features(features) +``` + +### Configuration +We offer two Online Store options for Vector Databases. PGVector and SQLite. + +#### Installation with SQLite +If you are using `pyenv` to manage your Python versions, you can install the SQLite extension with the following command: +```bash +PYTHON_CONFIGURE_OPTS="--enable-loadable-sqlite-extensions" \ + LDFLAGS="-L/opt/homebrew/opt/sqlite/lib" \ + CPPFLAGS="-I/opt/homebrew/opt/sqlite/include" \ + pyenv install 3.10.14 +``` +And you can the Feast install package via: +```bash +pip install feast[sqlite_vec] ``` \ No newline at end of file diff --git a/infra/scripts/pixi/pixi.lock b/infra/scripts/pixi/pixi.lock index 19a32f32ae..f1ce2d2658 100644 --- a/infra/scripts/pixi/pixi.lock +++ b/infra/scripts/pixi/pixi.lock @@ -1,4 +1,4 @@ -version: 4 +version: 5 environments: default: channels: @@ -11,6 +11,9 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h95c4c6d_6.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda + osx-arm64: + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-17.0.6-h5f092b4_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/uv-0.1.45-hc069d6b_0.conda py310: channels: - url: https://conda.anaconda.org/conda-forge/ @@ -38,6 +41,21 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 + osx-arm64: + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h93a5062_5.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ca-certificates-2024.2.2-hf0a4a13_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-17.0.6-h5f092b4_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.2-h3422bc3_5.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.45.3-h091b4b1_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.2.13-hfb2fe0b_6.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-hb89a1cb_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.3.0-hfb2fe0b_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.10.14-h2469fbe_0_cpython.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h92ec313_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/uv-0.1.45-hc069d6b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/xz-5.2.6-h57fd34a_0.tar.bz2 py311: channels: - url: https://conda.anaconda.org/conda-forge/ @@ -66,6 +84,22 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 + osx-arm64: + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h93a5062_5.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ca-certificates-2024.2.2-hf0a4a13_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-17.0.6-h5f092b4_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libexpat-2.6.2-hebf3989_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.2-h3422bc3_5.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.45.3-h091b4b1_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.2.13-hfb2fe0b_6.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-hb89a1cb_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.3.0-hfb2fe0b_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.11.9-h932a869_0_cpython.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h92ec313_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/uv-0.1.45-hc069d6b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/xz-5.2.6-h57fd34a_0.tar.bz2 py39: channels: - url: https://conda.anaconda.org/conda-forge/ @@ -93,6 +127,21 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 + osx-arm64: + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h93a5062_5.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ca-certificates-2024.2.2-hf0a4a13_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-17.0.6-h5f092b4_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.2-h3422bc3_5.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.45.3-h091b4b1_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.2.13-hfb2fe0b_6.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-hb89a1cb_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.3.0-hfb2fe0b_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.9.19-hd7ebdb9_0_cpython.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h92ec313_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/uv-0.1.45-hc069d6b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/xz-5.2.6-h57fd34a_0.tar.bz2 packages: - kind: conda name: _libgcc_mutex @@ -123,6 +172,19 @@ packages: license_family: BSD size: 23621 timestamp: 1650670423406 +- kind: conda + name: bzip2 + version: 1.0.8 + build: h93a5062_5 + build_number: 5 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h93a5062_5.conda + sha256: bfa84296a638bea78a8bb29abc493ee95f2a0218775642474a840411b950fe5f + md5: 1bbc659ca658bfd49a481b5ef7a0f40f + license: bzip2-1.0.6 + license_family: BSD + size: 122325 + timestamp: 1699280294368 - kind: conda name: bzip2 version: 1.0.8 @@ -149,6 +211,17 @@ packages: license: ISC size: 155432 timestamp: 1706843687645 +- kind: conda + name: ca-certificates + version: 2024.2.2 + build: hf0a4a13_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/ca-certificates-2024.2.2-hf0a4a13_0.conda + sha256: 49bc3439816ac72d0c0e0f144b8cc870fdcc4adec2e861407ec818d8116b2204 + md5: fb416a1795f18dcc5a038bc2dc54edf9 + license: ISC + size: 155725 + timestamp: 1706844034242 - kind: conda name: ld_impl_linux-64 version: '2.40' @@ -177,6 +250,20 @@ packages: license_family: GPL size: 713322 timestamp: 1713651222435 +- kind: conda + name: libcxx + version: 17.0.6 + build: h5f092b4_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-17.0.6-h5f092b4_0.conda + sha256: 119d3d9306f537d4c89dc99ed99b94c396d262f0b06f7833243646f68884f2c2 + md5: a96fd5dda8ce56c86a971e0fa02751d0 + depends: + - __osx >=11.0 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 1248885 + timestamp: 1715020154867 - kind: conda name: libexpat version: 2.6.2 @@ -193,6 +280,33 @@ packages: license_family: MIT size: 73730 timestamp: 1710362120304 +- kind: conda + name: libexpat + version: 2.6.2 + build: hebf3989_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/libexpat-2.6.2-hebf3989_0.conda + sha256: ba7173ac30064ea901a4c9fb5a51846dcc25512ceb565759be7d18cbf3e5415e + md5: e3cde7cfa87f82f7cb13d482d5e0ad09 + constrains: + - expat 2.6.2.* + license: MIT + license_family: MIT + size: 63655 + timestamp: 1710362424980 +- kind: conda + name: libffi + version: 3.4.2 + build: h3422bc3_5 + build_number: 5 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.2-h3422bc3_5.tar.bz2 + sha256: 41b3d13efb775e340e4dba549ab5c029611ea6918703096b2eaa9c015c0750ca + md5: 086914b672be056eb70fd4285b6783b6 + license: MIT + license_family: MIT + size: 39020 + timestamp: 1636488587153 - kind: conda name: libffi version: 3.4.2 @@ -288,6 +402,19 @@ packages: license_family: GPL size: 33408 timestamp: 1697359010159 +- kind: conda + name: libsqlite + version: 3.45.3 + build: h091b4b1_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.45.3-h091b4b1_0.conda + sha256: 4337f466eb55bbdc74e168b52ec8c38f598e3664244ec7a2536009036e2066cc + md5: c8c1186c7f3351f6ffddb97b1f54fc58 + depends: + - libzlib >=1.2.13,<2.0.0a0 + license: Unlicense + size: 824794 + timestamp: 1713367748819 - kind: conda name: libsqlite version: 3.45.3 @@ -360,6 +487,23 @@ packages: license_family: Other size: 61588 timestamp: 1686575217516 +- kind: conda + name: libzlib + version: 1.2.13 + build: hfb2fe0b_6 + build_number: 6 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.2.13-hfb2fe0b_6.conda + sha256: 8b29a2386d99b8f58178951dcf19117b532cd9c4aa07623bf1667eae99755d32 + md5: 9c4e121cd926cab631bd1c4a61d18b17 + depends: + - __osx >=11.0 + constrains: + - zlib 1.2.13 *_6 + license: Zlib + license_family: Other + size: 46768 + timestamp: 1716874151980 - kind: conda name: ncurses version: 6.4.20240210 @@ -373,6 +517,17 @@ packages: license: X11 AND BSD-3-Clause size: 895669 timestamp: 1710866638986 +- kind: conda + name: ncurses + version: '6.5' + build: hb89a1cb_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-hb89a1cb_0.conda + sha256: 87d7cf716d9d930dab682cb57b3b8d3a61940b47d6703f3529a155c938a6990a + md5: b13ad5724ac9ae98b6b4fd87e4500ba4 + license: X11 AND BSD-3-Clause + size: 795131 + timestamp: 1715194898402 - kind: conda name: openssl version: 3.2.1 @@ -408,6 +563,24 @@ packages: license_family: Apache size: 2895187 timestamp: 1714466138265 +- kind: conda + name: openssl + version: 3.3.0 + build: hfb2fe0b_3 + build_number: 3 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.3.0-hfb2fe0b_3.conda + sha256: 6f41c163ab57e7499dff092be4498614651f0f6432e12c2b9f06859a8bc39b75 + md5: 730f618b008b3c13c1e3f973408ddd67 + depends: + - __osx >=11.0 + - ca-certificates + constrains: + - pyopenssl >=22.1 + license: Apache-2.0 + license_family: Apache + size: 2893954 + timestamp: 1716468329572 - kind: conda name: python version: 3.9.19 @@ -437,6 +610,54 @@ packages: license: Python-2.0 size: 23800555 timestamp: 1710940120866 +- kind: conda + name: python + version: 3.9.19 + build: hd7ebdb9_0_cpython + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.9.19-hd7ebdb9_0_cpython.conda + sha256: 3b93f7a405f334043758dfa8aaca050429a954a37721a6462ebd20e94ef7c5a0 + md5: 45c4d173b12154f746be3b49b1190634 + depends: + - bzip2 >=1.0.8,<2.0a0 + - libffi >=3.4,<4.0a0 + - libsqlite >=3.45.2,<4.0a0 + - libzlib >=1.2.13,<2.0.0a0 + - ncurses >=6.4.20240210,<7.0a0 + - openssl >=3.2.1,<4.0a0 + - readline >=8.2,<9.0a0 + - tk >=8.6.13,<8.7.0a0 + - tzdata + - xz >=5.2.6,<6.0a0 + constrains: + - python_abi 3.9.* *_cp39 + license: Python-2.0 + size: 11847835 + timestamp: 1710939779164 +- kind: conda + name: python + version: 3.10.14 + build: h2469fbe_0_cpython + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.10.14-h2469fbe_0_cpython.conda + sha256: 454d609fe25daedce9e886efcbfcadad103ed0362e7cb6d2bcddec90b1ecd3ee + md5: 4ae999c8227c6d8c7623d32d51d25ea9 + depends: + - bzip2 >=1.0.8,<2.0a0 + - libffi >=3.4,<4.0a0 + - libsqlite >=3.45.2,<4.0a0 + - libzlib >=1.2.13,<2.0.0a0 + - ncurses >=6.4.20240210,<7.0a0 + - openssl >=3.2.1,<4.0a0 + - readline >=8.2,<9.0a0 + - tk >=8.6.13,<8.7.0a0 + - tzdata + - xz >=5.2.6,<6.0a0 + constrains: + - python_abi 3.10.* *_cp310 + license: Python-2.0 + size: 12336005 + timestamp: 1710939659384 - kind: conda name: python version: 3.10.14 @@ -466,6 +687,32 @@ packages: license: Python-2.0 size: 25517742 timestamp: 1710939725109 +- kind: conda + name: python + version: 3.11.9 + build: h932a869_0_cpython + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.11.9-h932a869_0_cpython.conda + sha256: a436ceabde1f056a0ac3e347dadc780ee2a135a421ddb6e9a469370769829e3c + md5: 293e0713ae804b5527a673e7605c04fc + depends: + - __osx >=11.0 + - bzip2 >=1.0.8,<2.0a0 + - libexpat >=2.6.2,<3.0a0 + - libffi >=3.4,<4.0a0 + - libsqlite >=3.45.3,<4.0a0 + - libzlib >=1.2.13,<2.0.0a0 + - ncurses >=6.4.20240210,<7.0a0 + - openssl >=3.2.1,<4.0a0 + - readline >=8.2,<9.0a0 + - tk >=8.6.13,<8.7.0a0 + - tzdata + - xz >=5.2.6,<6.0a0 + constrains: + - python_abi 3.11.* *_cp311 + license: Python-2.0 + size: 14644189 + timestamp: 1713552154779 - kind: conda name: python version: 3.11.9 @@ -512,6 +759,36 @@ packages: license_family: GPL size: 281456 timestamp: 1679532220005 +- kind: conda + name: readline + version: '8.2' + build: h92ec313_1 + build_number: 1 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h92ec313_1.conda + sha256: a1dfa679ac3f6007362386576a704ad2d0d7a02e98f5d0b115f207a2da63e884 + md5: 8cbb776a2f641b943d413b3e19df71f4 + depends: + - ncurses >=6.3,<7.0a0 + license: GPL-3.0-only + license_family: GPL + size: 250351 + timestamp: 1679532511311 +- kind: conda + name: tk + version: 8.6.13 + build: h5083fa2_1 + build_number: 1 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda + sha256: 72457ad031b4c048e5891f3f6cb27a53cb479db68a52d965f796910e71a403a8 + md5: b50a57ba89c32b62428b71a875291c9b + depends: + - libzlib >=1.2.13,<1.3.0a0 + license: TCL + license_family: BSD + size: 3145523 + timestamp: 1699202432999 - kind: conda name: tk version: 8.6.13 @@ -554,6 +831,22 @@ packages: license: Apache-2.0 OR MIT size: 11891252 timestamp: 1714233659570 +- kind: conda + name: uv + version: 0.1.45 + build: hc069d6b_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/uv-0.1.45-hc069d6b_0.conda + sha256: 80dfc19f2ef473e86e718361847d1d598e95ffd0c0f5de7d07cda35d25f6aef5 + md5: 9192238a60bc6da9c41092990c31eb41 + depends: + - __osx >=11.0 + - libcxx >=16 + constrains: + - __osx >=11.0 + license: Apache-2.0 OR MIT + size: 9231858 + timestamp: 1716265232676 - kind: conda name: xz version: 5.2.6 @@ -567,3 +860,14 @@ packages: license: LGPL-2.1 and GPL-2.0 size: 418368 timestamp: 1660346797927 +- kind: conda + name: xz + version: 5.2.6 + build: h57fd34a_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/xz-5.2.6-h57fd34a_0.tar.bz2 + sha256: 59d78af0c3e071021cfe82dc40134c19dab8cdf804324b62940f5c8cd71803ec + md5: 39c6b54e94014701dd157f4f576ed211 + license: LGPL-2.1 and GPL-2.0 + size: 235693 + timestamp: 1660346961024 diff --git a/infra/scripts/pixi/pixi.toml b/infra/scripts/pixi/pixi.toml index f0d360fff3..10179339f7 100644 --- a/infra/scripts/pixi/pixi.toml +++ b/infra/scripts/pixi/pixi.toml @@ -1,7 +1,7 @@ [project] name = "pixi-feast" channels = ["conda-forge"] -platforms = ["linux-64"] +platforms = ["linux-64", "osx-arm64"] [tasks] diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 97b7c5456f..577bd3fe52 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1934,18 +1934,28 @@ def _retrieve_online_documents( "Using embedding functionality is not supported for document retrieval. Please embed the query before calling retrieve_online_documents." ) ( - requested_feature_views, + available_feature_views, _, ) = self._get_feature_views_to_use( features=[feature], allow_cache=True, hide_dummy_entity=False ) + requested_feature_view_name = ( + feature.split(":")[0] if isinstance(feature, str) else feature + ) + for feature_view in available_feature_views: + if feature_view.name == requested_feature_view_name: + requested_feature_view = feature_view + if not requested_feature_view: + raise ValueError( + f"Feature view {requested_feature_view} not found in the registry." + ) requested_feature = ( feature.split(":")[1] if isinstance(feature, str) else feature ) provider = self._get_provider() document_features = self._retrieve_from_online_store( provider, - requested_feature_views[0], + requested_feature_view, requested_feature, query, top_k, diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index 63d3ef03f5..41af14aaf1 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -14,10 +14,14 @@ import itertools import os import sqlite3 +import struct +import sys from datetime import datetime from pathlib import Path -from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union +import sqlite_vec +from google.protobuf.internal.containers import RepeatedScalarFieldContainer from pydantic import StrictStr from feast import Entity @@ -29,6 +33,7 @@ from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.protos.feast.core.SqliteTable_pb2 import SqliteTable as SqliteTableProto from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import FloatList as FloatListProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.utils import to_naive_utc @@ -45,6 +50,12 @@ class SqliteOnlineStoreConfig(FeastConfigBaseModel): path: StrictStr = "data/online.db" """ (optional) Path to sqlite db """ + vec_enabled: Optional[bool] = False + """ (optional) Enable or disable sqlite-vss for vector search""" + + vector_len: Optional[int] = 512 + """ (optional) Length of the vector to be stored in the database""" + class SqliteOnlineStore(OnlineStore): """ @@ -73,6 +84,10 @@ def _get_conn(self, config: RepoConfig): if not self._conn: db_path = self._get_db_path(config) self._conn = _initialize_conn(db_path) + if sys.version_info[0:2] == (3, 10): + self._conn.enable_load_extension(True) # type: ignore + sqlite_vec.load(self._conn) + return self._conn def online_write_batch( @@ -80,7 +95,12 @@ def online_write_batch( config: RepoConfig, table: FeatureView, data: List[ - Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] + Tuple[ + EntityKeyProto, + Dict[str, ValueProto], + datetime, + Optional[datetime], + ] ], progress: Optional[Callable[[int], Any]], ) -> None: @@ -98,36 +118,74 @@ def online_write_batch( if created_ts is not None: created_ts = to_naive_utc(created_ts) + table_name = _table_id(project, table) for feature_name, val in values.items(): - conn.execute( - f""" - UPDATE {_table_id(project, table)} - SET value = ?, event_ts = ?, created_ts = ? - WHERE (entity_key = ? AND feature_name = ?) - """, - ( - # SET - val.SerializeToString(), - timestamp, - created_ts, - # WHERE - entity_key_bin, - feature_name, - ), - ) - - conn.execute( - f"""INSERT OR IGNORE INTO {_table_id(project, table)} - (entity_key, feature_name, value, event_ts, created_ts) - VALUES (?, ?, ?, ?, ?)""", - ( - entity_key_bin, - feature_name, - val.SerializeToString(), - timestamp, - created_ts, - ), - ) + if config.online_store.vec_enabled: + vector_bin = serialize_f32( + val.float_list_val.val, config.online_store.vector_len + ) # type: ignore + conn.execute( + f""" + UPDATE {table_name} + SET value = ?, vector_value = ?, event_ts = ?, created_ts = ? + WHERE (entity_key = ? AND feature_name = ?) + """, + ( + # SET + val.SerializeToString(), + vector_bin, + timestamp, + created_ts, + # WHERE + entity_key_bin, + feature_name, + ), + ) + + conn.execute( + f"""INSERT OR IGNORE INTO {table_name} + (entity_key, feature_name, value, vector_value, event_ts, created_ts) + VALUES (?, ?, ?, ?, ?, ?)""", + ( + entity_key_bin, + feature_name, + val.SerializeToString(), + vector_bin, + timestamp, + created_ts, + ), + ) + + else: + conn.execute( + f""" + UPDATE {table_name} + SET value = ?, event_ts = ?, created_ts = ? + WHERE (entity_key = ? AND feature_name = ?) + """, + ( + # SET + val.SerializeToString(), + timestamp, + created_ts, + # WHERE + entity_key_bin, + feature_name, + ), + ) + + conn.execute( + f"""INSERT OR IGNORE INTO {table_name} + (entity_key, feature_name, value, event_ts, created_ts) + VALUES (?, ?, ?, ?, ?)""", + ( + entity_key_bin, + feature_name, + val.SerializeToString(), + timestamp, + created_ts, + ), + ) if progress: progress(1) @@ -195,7 +253,7 @@ def update( for table in tables_to_keep: conn.execute( - f"CREATE TABLE IF NOT EXISTS {_table_id(project, table)} (entity_key BLOB, feature_name TEXT, value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))" + f"CREATE TABLE IF NOT EXISTS {_table_id(project, table)} (entity_key BLOB, feature_name TEXT, value BLOB, vector_value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))" ) conn.execute( f"CREATE INDEX IF NOT EXISTS {_table_id(project, table)}_ek ON {_table_id(project, table)} (entity_key);" @@ -232,6 +290,124 @@ def teardown( except FileNotFoundError: pass + def retrieve_online_documents( + self, + config: RepoConfig, + table: FeatureView, + requested_feature: str, + embedding: List[float], + top_k: int, + distance_metric: Optional[str] = None, + ) -> List[ + Tuple[ + Optional[datetime], + Optional[ValueProto], + Optional[ValueProto], + Optional[ValueProto], + ] + ]: + """ + + Args: + config: Feast configuration object + table: FeatureView object as the table to search + requested_feature: The requested feature as the column to search + embedding: The query embedding to search for + top_k: The number of items to return + Returns: + List of tuples containing the event timestamp, the document feature, the vector value, and the distance + """ + project = config.project + + if not config.online_store.vec_enabled: + raise ValueError("sqlite-vss is not enabled in the online store config") + + conn = self._get_conn(config) + cur = conn.cursor() + + # Convert the embedding to a binary format instead of using SerializeToString() + query_embedding_bin = serialize_f32(embedding, config.online_store.vector_len) + table_name = _table_id(project, table) + + cur.execute( + f""" + CREATE VIRTUAL TABLE vec_example using vec0( + vector_value float[{config.online_store.vector_len}] + ); + """ + ) + + # Currently I can only insert the embedding value without crashing SQLite, will report a bug + cur.execute( + f""" + INSERT INTO vec_example(rowid, vector_value) + select rowid, vector_value from {table_name} + """ + ) + cur.execute( + """ + INSERT INTO vec_example(rowid, vector_value) + VALUES (?, ?) + """, + (0, query_embedding_bin), + ) + + # Have to join this with the {table_name} to get the feature name and entity_key + # Also the `top_k` doesn't appear to be working for some reason + cur.execute( + f""" + select + fv.entity_key, + f.vector_value, + fv.value, + f.distance, + fv.event_ts + from ( + select + rowid, + vector_value, + distance + from vec_example + where vector_value match ? + order by distance + limit ? + ) f + left join {table_name} fv + on f.rowid = fv.rowid + """, + (query_embedding_bin, top_k), + ) + + rows = cur.fetchall() + + result: List[ + Tuple[ + Optional[datetime], + Optional[ValueProto], + Optional[ValueProto], + Optional[ValueProto], + ] + ] = [] + + for entity_key, _, string_value, distance, event_ts in rows: + feature_value_proto = ValueProto() + feature_value_proto.ParseFromString(string_value if string_value else b"") + vector_value_proto = ValueProto( + float_list_val=FloatListProto(val=embedding) + ) + distance_value_proto = ValueProto(float_val=distance) + + result.append( + ( + event_ts, + feature_value_proto, + vector_value_proto, + distance_value_proto, + ) + ) + + return result + def _initialize_conn(db_path: str): Path(db_path).parent.mkdir(exist_ok=True) @@ -246,6 +422,19 @@ def _table_id(project: str, table: FeatureView) -> str: return f"{project}_{table.name}" +def serialize_f32( + vector: Union[RepeatedScalarFieldContainer[float], List[float]], vector_length: int +) -> bytes: + """serializes a list of floats into a compact "raw bytes" format""" + return struct.pack(f"{vector_length}f", *vector) + + +def deserialize_f32(byte_vector: bytes, vector_length: int) -> List[float]: + """deserializes a list of floats from a compact "raw bytes" format""" + num_floats = vector_length // 4 # 4 bytes per float + return list(struct.unpack(f"{num_floats}f", byte_vector)) + + class SqliteTable(InfraObject): """ A Sqlite table managed by Feast. @@ -292,8 +481,11 @@ def from_proto(sqlite_table_proto: SqliteTableProto) -> Any: ) def update(self): + if sys.version_info[0:2] == (3, 10): + self.conn.enable_load_extension(True) + sqlite_vec.load(self.conn) self.conn.execute( - f"CREATE TABLE IF NOT EXISTS {self.name} (entity_key BLOB, feature_name TEXT, value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))" + f"CREATE TABLE IF NOT EXISTS {self.name} (entity_key BLOB, feature_name TEXT, value BLOB, vector_value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))" ) self.conn.execute( f"CREATE INDEX IF NOT EXISTS {self.name}_ek ON {self.name} (entity_key);" diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index fd5d4631e5..d0da39aef4 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -53,7 +53,7 @@ azure-core==1.30.1 # azure-storage-blob azure-identity==1.16.0 # via feast (setup.py) -azure-storage-blob==12.19.1 +azure-storage-blob==12.20.0 # via feast (setup.py) babel==2.15.0 # via @@ -124,7 +124,7 @@ comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.5.1 +coverage[toml]==7.5.3 # via pytest-cov cryptography==42.0.7 # via @@ -161,17 +161,17 @@ distlib==0.3.8 # via virtualenv dnspython==2.6.1 # via email-validator -docker==7.0.0 +docker==7.1.0 # via # feast (setup.py) # testcontainers docutils==0.19 # via sphinx -duckdb==0.10.2 +duckdb==0.10.3 # via # duckdb-engine # ibis-framework -duckdb-engine==0.12.0 +duckdb-engine==0.12.1 # via ibis-framework elastic-transport==8.13.1 # via elasticsearch @@ -230,7 +230,7 @@ google-api-core[grpc]==2.19.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.128.0 +google-api-python-client==2.131.0 # via firebase-admin google-auth==2.29.0 # via @@ -279,11 +279,11 @@ googleapis-common-protos[grpc]==1.63.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.13 +great-expectations==0.18.15 # via feast (setup.py) grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.63.0 +grpcio==1.64.0 # via # feast (setup.py) # google-api-core @@ -313,7 +313,7 @@ h11==0.14.0 # uvicorn happybase==1.2.0 # via feast (setup.py) -hazelcast-python-client==5.3.0 +hazelcast-python-client==5.4.0 # via feast (setup.py) hiredis==2.3.2 # via feast (setup.py) @@ -355,12 +355,12 @@ iniconfig==2.0.0 # via pytest ipykernel==6.29.4 # via jupyterlab -ipython==8.24.0 +ipython==8.25.0 # via # great-expectations # ipykernel # ipywidgets -ipywidgets==8.1.2 +ipywidgets==8.1.3 # via great-expectations isodate==0.6.1 # via azure-storage-blob @@ -402,7 +402,7 @@ jsonschema[format-nongpl]==4.22.0 # nbformat jsonschema-specifications==2023.12.1 # via jsonschema -jupyter-client==8.6.1 +jupyter-client==8.6.2 # via # ipykernel # jupyter-server @@ -420,7 +420,7 @@ jupyter-events==0.10.0 # via jupyter-server jupyter-lsp==2.2.5 # via jupyterlab -jupyter-server==2.14.0 +jupyter-server==2.14.1 # via # jupyter-lsp # jupyterlab @@ -429,15 +429,15 @@ jupyter-server==2.14.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.1.8 +jupyterlab==4.2.1 # via notebook jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.27.1 +jupyterlab-server==2.27.2 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.10 +jupyterlab-widgets==3.0.11 # via ipywidgets kubernetes==20.13.0 # via feast (setup.py) @@ -506,9 +506,9 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel -nodeenv==1.8.0 +nodeenv==1.9.0 # via pre-commit -notebook==7.1.3 +notebook==7.2.0 # via great-expectations notebook-shim==0.2.4 # via @@ -536,7 +536,6 @@ packaging==24.0 # build # dask # db-dtypes - # docker # duckdb-engine # google-cloud-bigquery # great-expectations @@ -594,7 +593,7 @@ pre-commit==3.3.1 # via feast (setup.py) prometheus-client==0.20.0 # via jupyter-server -prompt-toolkit==3.0.43 +prompt-toolkit==3.0.45 # via ipython proto-plus==1.23.0 # via @@ -822,19 +821,18 @@ rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations -ruff==0.4.3 +ruff==0.4.6 # via feast (setup.py) s3transfer==0.10.1 # via boto3 -scipy==1.13.0 +scipy==1.13.1 # via great-expectations send2trash==1.8.3 # via jupyter-server -setuptools==69.5.1 +setuptools==70.0.0 # via # grpcio-tools # kubernetes - # nodeenv # pip-tools shellingham==1.5.4 # via typer @@ -857,7 +855,7 @@ sniffio==1.3.1 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.10.0 +snowflake-connector-python[pandas]==3.10.1 # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python @@ -887,11 +885,13 @@ sqlalchemy-views==0.3.2 # via ibis-framework sqlglot==20.11.0 # via ibis-framework +sqlite-vec==0.0.1a10 + # via feast (setup.py) stack-data==0.6.3 # via ipython starlette==0.37.2 # via fastapi -substrait==0.17.0 +substrait==0.19.0 # via ibis-substrait tabulate==0.9.0 # via feast (setup.py) @@ -918,7 +918,7 @@ tomli==2.0.1 # pip-tools # pytest # pytest-env -tomlkit==0.12.4 +tomlkit==0.12.5 # via snowflake-connector-python toolz==0.12.1 # via @@ -981,7 +981,7 @@ types-redis==4.6.0.20240425 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==69.5.0.20240423 +types-setuptools==70.0.0.20240524 # via # feast (setup.py) # types-cffi @@ -1064,7 +1064,7 @@ werkzeug==3.0.3 # via moto wheel==0.43.0 # via pip-tools -widgetsnbextension==4.0.10 +widgetsnbextension==4.0.11 # via ipywidgets wrapt==1.16.0 # via diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 72124636b6..23bd94feb5 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -166,6 +166,8 @@ sniffio==1.3.1 # httpx sqlalchemy[mypy]==2.0.30 # via feast (setup.py) +sqlite-vec==0.0.1a10 + # via feast (setup.py) starlette==0.37.2 # via fastapi tabulate==0.9.0 diff --git a/sdk/python/requirements/py3.11-ci-requirements.txt b/sdk/python/requirements/py3.11-ci-requirements.txt index bd0647a3fe..643e3715c6 100644 --- a/sdk/python/requirements/py3.11-ci-requirements.txt +++ b/sdk/python/requirements/py3.11-ci-requirements.txt @@ -36,10 +36,6 @@ asttokens==2.4.1 # via stack-data async-lru==2.0.4 # via jupyterlab -async-timeout==4.0.3 - # via - # aiohttp - # redis atpublic==4.1.0 # via ibis-framework attrs==23.2.0 @@ -53,7 +49,7 @@ azure-core==1.30.1 # azure-storage-blob azure-identity==1.16.0 # via feast (setup.py) -azure-storage-blob==12.19.1 +azure-storage-blob==12.20.0 # via feast (setup.py) babel==2.15.0 # via @@ -124,7 +120,7 @@ comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.5.1 +coverage[toml]==7.5.3 # via pytest-cov cryptography==42.0.7 # via @@ -153,7 +149,7 @@ decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -deltalake==0.17.3 +deltalake==0.17.4 # via feast (setup.py) dill==0.3.8 # via feast (setup.py) @@ -161,17 +157,17 @@ distlib==0.3.8 # via virtualenv dnspython==2.6.1 # via email-validator -docker==7.0.0 +docker==7.1.0 # via # feast (setup.py) # testcontainers docutils==0.19 # via sphinx -duckdb==0.10.2 +duckdb==0.10.3 # via # duckdb-engine # ibis-framework -duckdb-engine==0.12.0 +duckdb-engine==0.12.1 # via ibis-framework elastic-transport==8.13.1 # via elasticsearch @@ -181,11 +177,6 @@ email-validator==2.1.1 # via fastapi entrypoints==0.4 # via altair -exceptiongroup==1.2.1 - # via - # anyio - # ipython - # pytest execnet==2.1.1 # via pytest-xdist executing==2.0.1 @@ -230,7 +221,7 @@ google-api-core[grpc]==2.19.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.128.0 +google-api-python-client==2.131.0 # via firebase-admin google-auth==2.29.0 # via @@ -279,11 +270,11 @@ googleapis-common-protos[grpc]==1.63.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.13 +great-expectations==0.18.15 # via feast (setup.py) grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.63.0 +grpcio==1.64.0 # via # feast (setup.py) # google-api-core @@ -313,7 +304,7 @@ h11==0.14.0 # uvicorn happybase==1.2.0 # via feast (setup.py) -hazelcast-python-client==5.3.0 +hazelcast-python-client==5.4.0 # via feast (setup.py) hiredis==2.3.2 # via feast (setup.py) @@ -355,12 +346,12 @@ iniconfig==2.0.0 # via pytest ipykernel==6.29.4 # via jupyterlab -ipython==8.24.0 +ipython==8.25.0 # via # great-expectations # ipykernel # ipywidgets -ipywidgets==8.1.2 +ipywidgets==8.1.3 # via great-expectations isodate==0.6.1 # via azure-storage-blob @@ -402,7 +393,7 @@ jsonschema[format-nongpl]==4.22.0 # nbformat jsonschema-specifications==2023.12.1 # via jsonschema -jupyter-client==8.6.1 +jupyter-client==8.6.2 # via # ipykernel # jupyter-server @@ -420,7 +411,7 @@ jupyter-events==0.10.0 # via jupyter-server jupyter-lsp==2.2.5 # via jupyterlab -jupyter-server==2.14.0 +jupyter-server==2.14.1 # via # jupyter-lsp # jupyterlab @@ -429,15 +420,15 @@ jupyter-server==2.14.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.1.8 +jupyterlab==4.2.1 # via notebook jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.27.1 +jupyterlab-server==2.27.2 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.10 +jupyterlab-widgets==3.0.11 # via ipywidgets kubernetes==20.13.0 # via feast (setup.py) @@ -506,9 +497,9 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel -nodeenv==1.8.0 +nodeenv==1.9.0 # via pre-commit -notebook==7.1.3 +notebook==7.2.0 # via great-expectations notebook-shim==0.2.4 # via @@ -536,7 +527,6 @@ packaging==24.0 # build # dask # db-dtypes - # docker # duckdb-engine # google-cloud-bigquery # great-expectations @@ -594,7 +584,7 @@ pre-commit==3.3.1 # via feast (setup.py) prometheus-client==0.20.0 # via jupyter-server -prompt-toolkit==3.0.43 +prompt-toolkit==3.0.45 # via ipython proto-plus==1.23.0 # via @@ -775,7 +765,7 @@ referencing==0.35.1 # jsonschema # jsonschema-specifications # jupyter-events -regex==2024.4.28 +regex==2024.5.15 # via feast (setup.py) requests==2.31.0 # via @@ -822,19 +812,18 @@ rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations -ruff==0.4.3 +ruff==0.4.6 # via feast (setup.py) s3transfer==0.10.1 # via boto3 -scipy==1.13.0 +scipy==1.13.1 # via great-expectations send2trash==1.8.3 # via jupyter-server -setuptools==69.5.1 +setuptools==70.0.0 # via # grpcio-tools # kubernetes - # nodeenv # pip-tools shellingham==1.5.4 # via typer @@ -857,7 +846,7 @@ sniffio==1.3.1 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.10.0 +snowflake-connector-python[pandas]==3.10.1 # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python @@ -887,11 +876,13 @@ sqlalchemy-views==0.3.2 # via ibis-framework sqlglot==20.11.0 # via ibis-framework +sqlite-vec==0.0.1a10 + # via feast (setup.py) stack-data==0.6.3 # via ipython starlette==0.37.2 # via fastapi -substrait==0.17.0 +substrait==0.19.0 # via ibis-substrait tabulate==0.9.0 # via feast (setup.py) @@ -909,16 +900,7 @@ tinycss2==1.3.0 # via nbconvert toml==0.10.2 # via feast (setup.py) -tomli==2.0.1 - # via - # build - # coverage - # jupyterlab - # mypy - # pip-tools - # pytest - # pytest-env -tomlkit==0.12.4 +tomlkit==0.12.5 # via snowflake-connector-python toolz==0.12.1 # via @@ -981,7 +963,7 @@ types-redis==4.6.0.20240425 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==69.5.0.20240423 +types-setuptools==70.0.0.20240524 # via # feast (setup.py) # types-cffi @@ -991,8 +973,6 @@ types-urllib3==1.26.25.14 # via types-requests typing-extensions==4.11.0 # via - # anyio - # async-lru # azure-core # azure-storage-blob # fastapi @@ -1007,7 +987,6 @@ typing-extensions==4.11.0 # testcontainers # typeguard # typer - # uvicorn tzdata==2024.1 # via pandas tzlocal==5.2 @@ -1064,7 +1043,7 @@ werkzeug==3.0.3 # via moto wheel==0.43.0 # via pip-tools -widgetsnbextension==4.0.10 +widgetsnbextension==4.0.11 # via ipywidgets wrapt==1.16.0 # via diff --git a/sdk/python/requirements/py3.11-requirements.txt b/sdk/python/requirements/py3.11-requirements.txt index a381a6262b..9698eea6df 100644 --- a/sdk/python/requirements/py3.11-requirements.txt +++ b/sdk/python/requirements/py3.11-requirements.txt @@ -40,8 +40,6 @@ dnspython==2.6.1 # via email-validator email-validator==2.1.1 # via fastapi -exceptiongroup==1.2.1 - # via anyio fastapi==0.111.0 # via # feast (setup.py) @@ -166,6 +164,8 @@ sniffio==1.3.1 # httpx sqlalchemy[mypy]==2.0.30 # via feast (setup.py) +sqlite-vec==0.0.1a10 + # via feast (setup.py) starlette==0.37.2 # via fastapi tabulate==0.9.0 @@ -174,8 +174,6 @@ tenacity==8.3.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) -tomli==2.0.1 - # via mypy toolz==0.12.1 # via # dask @@ -190,7 +188,6 @@ types-protobuf==5.26.0.20240422 # via mypy-protobuf typing-extensions==4.11.0 # via - # anyio # fastapi # mypy # pydantic @@ -198,7 +195,6 @@ typing-extensions==4.11.0 # sqlalchemy # typeguard # typer - # uvicorn tzdata==2024.1 # via pandas ujson==5.9.0 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 2456c85248..8aca700696 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -53,7 +53,7 @@ azure-core==1.30.1 # azure-storage-blob azure-identity==1.16.0 # via feast (setup.py) -azure-storage-blob==12.19.1 +azure-storage-blob==12.20.0 # via feast (setup.py) babel==2.15.0 # via @@ -124,7 +124,7 @@ comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.5.1 +coverage[toml]==7.5.3 # via pytest-cov cryptography==42.0.7 # via @@ -153,7 +153,7 @@ decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -deltalake==0.17.3 +deltalake==0.17.4 # via feast (setup.py) dill==0.3.8 # via feast (setup.py) @@ -161,17 +161,17 @@ distlib==0.3.8 # via virtualenv dnspython==2.6.1 # via email-validator -docker==7.0.0 +docker==7.1.0 # via # feast (setup.py) # testcontainers docutils==0.19 # via sphinx -duckdb==0.10.2 +duckdb==0.10.3 # via # duckdb-engine # ibis-framework -duckdb-engine==0.12.0 +duckdb-engine==0.12.1 # via ibis-framework elastic-transport==8.13.1 # via elasticsearch @@ -230,7 +230,7 @@ google-api-core[grpc]==2.19.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.128.0 +google-api-python-client==2.131.0 # via firebase-admin google-auth==2.29.0 # via @@ -279,11 +279,11 @@ googleapis-common-protos[grpc]==1.63.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.13 +great-expectations==0.18.15 # via feast (setup.py) grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.63.0 +grpcio==1.64.0 # via # feast (setup.py) # google-api-core @@ -313,7 +313,7 @@ h11==0.14.0 # uvicorn happybase==1.2.0 # via feast (setup.py) -hazelcast-python-client==5.3.0 +hazelcast-python-client==5.4.0 # via feast (setup.py) hiredis==2.3.2 # via feast (setup.py) @@ -350,7 +350,16 @@ idna==3.7 imagesize==1.4.1 # via sphinx importlib-metadata==7.1.0 - # via dask + # via + # build + # dask + # jupyter-client + # jupyter-lsp + # jupyterlab + # jupyterlab-server + # nbconvert + # sphinx + # typeguard iniconfig==2.0.0 # via pytest ipykernel==6.29.4 @@ -360,7 +369,7 @@ ipython==8.18.1 # great-expectations # ipykernel # ipywidgets -ipywidgets==8.1.2 +ipywidgets==8.1.3 # via great-expectations isodate==0.6.1 # via azure-storage-blob @@ -402,7 +411,7 @@ jsonschema[format-nongpl]==4.22.0 # nbformat jsonschema-specifications==2023.12.1 # via jsonschema -jupyter-client==8.6.1 +jupyter-client==8.6.2 # via # ipykernel # jupyter-server @@ -420,7 +429,7 @@ jupyter-events==0.10.0 # via jupyter-server jupyter-lsp==2.2.5 # via jupyterlab -jupyter-server==2.14.0 +jupyter-server==2.14.1 # via # jupyter-lsp # jupyterlab @@ -429,15 +438,15 @@ jupyter-server==2.14.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.1.8 +jupyterlab==4.2.1 # via notebook jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.27.1 +jupyterlab-server==2.27.2 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.10 +jupyterlab-widgets==3.0.11 # via ipywidgets kubernetes==20.13.0 # via feast (setup.py) @@ -506,9 +515,9 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel -nodeenv==1.8.0 +nodeenv==1.9.0 # via pre-commit -notebook==7.1.3 +notebook==7.2.0 # via great-expectations notebook-shim==0.2.4 # via @@ -536,7 +545,6 @@ packaging==24.0 # build # dask # db-dtypes - # docker # duckdb-engine # google-cloud-bigquery # great-expectations @@ -594,7 +602,7 @@ pre-commit==3.3.1 # via feast (setup.py) prometheus-client==0.20.0 # via jupyter-server -prompt-toolkit==3.0.43 +prompt-toolkit==3.0.45 # via ipython proto-plus==1.23.0 # via @@ -775,7 +783,7 @@ referencing==0.35.1 # jsonschema # jsonschema-specifications # jupyter-events -regex==2024.4.28 +regex==2024.5.15 # via feast (setup.py) requests==2.31.0 # via @@ -822,19 +830,20 @@ rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations -ruff==0.4.3 +ruamel-yaml-clib==0.2.8 + # via ruamel-yaml +ruff==0.4.6 # via feast (setup.py) s3transfer==0.10.1 # via boto3 -scipy==1.13.0 +scipy==1.13.1 # via great-expectations send2trash==1.8.3 # via jupyter-server -setuptools==69.5.1 +setuptools==70.0.0 # via # grpcio-tools # kubernetes - # nodeenv # pip-tools shellingham==1.5.4 # via typer @@ -857,7 +866,7 @@ sniffio==1.3.1 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.10.0 +snowflake-connector-python[pandas]==3.10.1 # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python @@ -887,11 +896,13 @@ sqlalchemy-views==0.3.2 # via ibis-framework sqlglot==20.11.0 # via ibis-framework +sqlite-vec==0.0.1a10 + # via feast (setup.py) stack-data==0.6.3 # via ipython starlette==0.37.2 # via fastapi -substrait==0.17.0 +substrait==0.19.0 # via ibis-substrait tabulate==0.9.0 # via feast (setup.py) @@ -918,7 +929,7 @@ tomli==2.0.1 # pip-tools # pytest # pytest-env -tomlkit==0.12.4 +tomlkit==0.12.5 # via snowflake-connector-python toolz==0.12.1 # via @@ -965,7 +976,7 @@ types-protobuf==3.19.22 # via # feast (setup.py) # mypy-protobuf -types-pymysql==1.1.0.20240425 +types-pymysql==1.1.0.20240524 # via feast (setup.py) types-pyopenssl==24.1.0.20240425 # via types-redis @@ -981,7 +992,7 @@ types-redis==4.6.0.20240425 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==69.5.0.20240423 +types-setuptools==70.0.0.20240524 # via # feast (setup.py) # types-cffi @@ -991,6 +1002,7 @@ types-urllib3==1.26.25.14 # via types-requests typing-extensions==4.11.0 # via + # aioitertools # anyio # async-lru # azure-core @@ -998,11 +1010,13 @@ typing-extensions==4.11.0 # fastapi # great-expectations # ibis-framework + # ipython # mypy # pydantic # pydantic-core # snowflake-connector-python # sqlalchemy + # starlette # testcontainers # typeguard # typer @@ -1031,6 +1045,7 @@ urllib3==1.26.18 # requests # responses # rockset + # snowflake-connector-python # testcontainers uvicorn[standard]==0.29.0 # via @@ -1063,7 +1078,7 @@ werkzeug==3.0.3 # via moto wheel==0.43.0 # via pip-tools -widgetsnbextension==4.0.10 +widgetsnbextension==4.0.11 # via ipywidgets wrapt==1.16.0 # via diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 72f422bfac..579f39135e 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -69,7 +69,9 @@ idna==3.7 # httpx # requests importlib-metadata==7.1.0 - # via dask + # via + # dask + # typeguard jinja2==3.1.4 # via # feast (setup.py) @@ -166,6 +168,8 @@ sniffio==1.3.1 # httpx sqlalchemy[mypy]==2.0.30 # via feast (setup.py) +sqlite-vec==0.0.1a10 + # via feast (setup.py) starlette==0.37.2 # via fastapi tabulate==0.9.0 @@ -196,6 +200,7 @@ typing-extensions==4.11.0 # pydantic # pydantic-core # sqlalchemy + # starlette # typeguard # typer # uvicorn diff --git a/sdk/python/tests/example_repos/example_feature_repo_1.py b/sdk/python/tests/example_repos/example_feature_repo_1.py index fbf1fbb9b0..20a8ad7bd8 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_1.py +++ b/sdk/python/tests/example_repos/example_feature_repo_1.py @@ -4,7 +4,7 @@ from feast import Entity, FeatureService, FeatureView, Field, FileSource, PushSource from feast.on_demand_feature_view import on_demand_feature_view -from feast.types import Float32, Int64, String +from feast.types import Array, Float32, Int64, String # Note that file source paths are not validated, so there doesn't actually need to be any data # at the paths for these file sources. Since these paths are effectively fake, this example @@ -32,6 +32,12 @@ batch_source=driver_locations_source, ) +rag_documents_source = FileSource( + name="rag_documents_source", + path="data/rag_documents.parquet", + timestamp_field="event_timestamp", +) + driver = Entity( name="driver", # The name is derived from this argument, not object name. join_keys=["driver_id"], @@ -43,6 +49,10 @@ join_keys=["customer_id"], ) +item = Entity( + name="item_id", # The name is derived from this argument, not object name. + join_keys=["item_id"], +) driver_locations = FeatureView( name="driver_locations", @@ -101,6 +111,17 @@ tags={}, ) +document_embeddings = FeatureView( + name="document_embeddings", + entities=[item], + schema=[ + Field(name="Embeddings", dtype=Array(Float32)), + Field(name="item_id", dtype=String), + ], + source=rag_documents_source, + ttl=timedelta(hours=24), +) + @on_demand_feature_view( sources=[customer_profile], diff --git a/sdk/python/tests/integration/registration/test_universal_cli.py b/sdk/python/tests/integration/registration/test_universal_cli.py index c16b26fee6..fc90108d78 100644 --- a/sdk/python/tests/integration/registration/test_universal_cli.py +++ b/sdk/python/tests/integration/registration/test_universal_cli.py @@ -74,13 +74,13 @@ def test_universal_cli(): cwd=repo_path, ) assertpy.assert_that(result.returncode).is_equal_to(0) - assertpy.assert_that(fs.list_feature_views()).is_length(4) + assertpy.assert_that(fs.list_feature_views()).is_length(5) result = runner.run( ["data-sources", "describe", "customer_profile_source"], cwd=repo_path, ) assertpy.assert_that(result.returncode).is_equal_to(0) - assertpy.assert_that(fs.list_data_sources()).is_length(4) + assertpy.assert_that(fs.list_data_sources()).is_length(5) # entity & feature view describe commands should fail when objects don't exist result = runner.run(["entities", "describe", "foo"], cwd=repo_path) diff --git a/sdk/python/tests/unit/online_store/test_online_retrieval.py b/sdk/python/tests/unit/online_store/test_online_retrieval.py index 5368b1e11c..13b220fbb9 100644 --- a/sdk/python/tests/unit/online_store/test_online_retrieval.py +++ b/sdk/python/tests/unit/online_store/test_online_retrieval.py @@ -1,20 +1,26 @@ import os +import platform +import sqlite3 +import sys import time from datetime import datetime +import numpy as np import pandas as pd import pytest +import sqlite_vec from pandas.testing import assert_frame_equal from feast import FeatureStore, RepoConfig from feast.errors import FeatureViewNotFoundException from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import FloatList as FloatListProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import RegistryConfig from tests.utils.cli_repo_creator import CliRunner, get_example_repo -def test_online() -> None: +def test_get_online_features() -> None: """ Test reading from the online store in local mode. """ @@ -415,3 +421,140 @@ def test_online_to_df(): ] expected_df = pd.DataFrame({k: reversed(v) for (k, v) in df_dict.items()}) assert_frame_equal(result_df[ordered_column], expected_df) + + +@pytest.mark.skipif( + sys.version_info[0:2] != (3, 10) or platform.system() != "Darwin", + reason="Only works on Python 3.10 and MacOS", +) +def test_sqlite_get_online_documents() -> None: + """ + Test retrieving documents from the online store in local mode. + """ + n = 10 # number of samples - note: we'll actually double it + vector_length = 8 + runner = CliRunner() + with runner.local_repo( + get_example_repo("example_feature_repo_1.py"), "file" + ) as store: + store.config.online_store.vec_enabled = True + store.config.online_store.vector_len = vector_length + # Write some data to two tables + document_embeddings_fv = store.get_feature_view(name="document_embeddings") + + provider = store._get_provider() + + item_keys = [ + EntityKeyProto( + join_keys=["item_id"], entity_values=[ValueProto(int64_val=i)] + ) + for i in range(n) + ] + data = [] + for item_key in item_keys: + data.append( + ( + item_key, + { + "Embeddings": ValueProto( + float_list_val=FloatListProto( + val=np.random.random( + vector_length, + ) + ) + ) + }, + datetime.utcnow(), + datetime.utcnow(), + ) + ) + + provider.online_write_batch( + config=store.config, + table=document_embeddings_fv, + data=data, + progress=None, + ) + documents_df = pd.DataFrame( + { + "item_id": [str(i) for i in range(n)], + "Embeddings": [ + np.random.random( + vector_length, + ) + for i in range(n) + ], + "event_timestamp": [datetime.utcnow() for _ in range(n)], + } + ) + + store.write_to_online_store( + feature_view_name="document_embeddings", + df=documents_df, + ) + + document_table = store._provider._online_store._conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' and name like '%_document_embeddings';" + ).fetchall() + assert len(document_table) == 1 + document_table_name = document_table[0][0] + record_count = len( + store._provider._online_store._conn.execute( + f"select * from {document_table_name}" + ).fetchall() + ) + assert record_count == len(data) + documents_df.shape[0] + + query_embedding = np.random.random( + vector_length, + ) + result = store.retrieve_online_documents( + feature="document_embeddings:Embeddings", query=query_embedding, top_k=3 + ).to_dict() + + assert "Embeddings" in result + assert "distance" in result + assert len(result["distance"]) == 3 + + +@pytest.mark.skipif( + sys.version_info[0:2] != (3, 10) or platform.system() != "Darwin", + reason="Only works on Python 3.10 and MacOS", +) +def test_sqlite_vec_import() -> None: + db = sqlite3.connect(":memory:") + db.enable_load_extension(True) + sqlite_vec.load(db) + + db.execute(""" + create virtual table vec_examples using vec0( + sample_embedding float[8] + ); + """) + + db.execute(""" + insert into vec_examples(rowid, sample_embedding) + values + (1, '[-0.200, 0.250, 0.341, -0.211, 0.645, 0.935, -0.316, -0.924]'), + (2, '[0.443, -0.501, 0.355, -0.771, 0.707, -0.708, -0.185, 0.362]'), + (3, '[0.716, -0.927, 0.134, 0.052, -0.669, 0.793, -0.634, -0.162]'), + (4, '[-0.710, 0.330, 0.656, 0.041, -0.990, 0.726, 0.385, -0.958]'); + """) + + sqlite_version, vec_version = db.execute( + "select sqlite_version(), vec_version()" + ).fetchone() + assert vec_version == "v0.0.1-alpha.10" + print(f"sqlite_version={sqlite_version}, vec_version={vec_version}") + + result = db.execute(""" + select + rowid, + distance + from vec_examples + where sample_embedding match '[0.890, 0.544, 0.825, 0.961, 0.358, 0.0196, 0.521, 0.175]' + order by distance + limit 2; + """).fetchall() + result = [(rowid, round(distance, 2)) for rowid, distance in result] + assert result == [(2, 2.39), (1, 2.39)] diff --git a/setup.py b/setup.py index 6d849a83b4..9b3d0e55e6 100644 --- a/setup.py +++ b/setup.py @@ -96,6 +96,9 @@ "pyspark>=3.0.0,<4", ] +SQLITE_VEC_REQUIRED = [ + "sqlite-vec==v0.0.1-alpha.10", +] TRINO_REQUIRED = ["trino>=0.305.0,<0.400.0", "regex"] POSTGRES_REQUIRED = [ @@ -214,6 +217,7 @@ + DUCKDB_REQUIRED + DELTA_REQUIRED + ELASTICSEARCH_REQUIRED + + SQLITE_VEC_REQUIRED ) DOCS_REQUIRED = CI_REQUIRED @@ -381,6 +385,7 @@ def run(self): "ikv": IKV_REQUIRED, "delta": DELTA_REQUIRED, "elasticsearch": ELASTICSEARCH_REQUIRED, + "sqlite_vec": SQLITE_VEC_REQUIRED, }, include_package_data=True, license="Apache",