diff --git a/.github/workflows/pr_integration_tests.yml b/.github/workflows/pr_integration_tests.yml index 923c0b0335b..7db547f61bb 100644 --- a/.github/workflows/pr_integration_tests.yml +++ b/.github/workflows/pr_integration_tests.yml @@ -95,6 +95,10 @@ jobs: run: | docker pull vishnunair/docker-redis-cluster:latest docker run -d -p 6001:6379 -p 6002:6380 -p 6003:6381 -p 6004:6382 -p 6005:6383 -p 6006:6384 --name redis-cluster vishnunair/docker-redis-cluster + - name: Setup Milvus Database + run: | + wget https://github.com/milvus-io/milvus/releases/download/v2.5.1/milvus-standalone-docker-compose.yml -O docker-compose.yml + docker compose up -d - name: Test python if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak env: diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index 165677a843a..05df5587580 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -866,6 +866,7 @@ def materialize_incremental_command(ctx: click.Context, end_ts: str, views: List "hazelcast", "ikv", "couchbase", + "milvus", ], case_sensitive=False, ), diff --git a/sdk/python/feast/infra/online_stores/milvus_online_store/milvus_repo_configuration.py b/sdk/python/feast/infra/online_stores/milvus_online_store/milvus_repo_configuration.py index 8e8402862cb..174c0b53737 100644 --- a/sdk/python/feast/infra/online_stores/milvus_online_store/milvus_repo_configuration.py +++ b/sdk/python/feast/infra/online_stores/milvus_online_store/milvus_repo_configuration.py @@ -1,6 +1,7 @@ from tests.integration.feature_repos.integration_test_repo_config import ( IntegrationTestRepoConfig, ) +from tests.integration.feature_repos.repo_configuration import MILVUS_CONFIG from tests.integration.feature_repos.universal.online_store.milvus import ( MilvusOnlineStoreCreator, ) @@ -10,3 +11,5 @@ online_store="milvus", online_store_creator=MilvusOnlineStoreCreator ), ] + +AVAILABLE_ONLINE_STORES = {"milvus": (MILVUS_CONFIG, MilvusOnlineStoreCreator)} diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index d943caa4c1a..47520f22bfe 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -61,6 +61,7 @@ "feast.infra.online_stores.contrib.elasticsearch.ElasticSearchOnlineStore": "feast.infra.online_stores.elasticsearch_online_store.ElasticSearchOnlineStore", "feast.infra.online_stores.contrib.singlestore_online_store.singlestore.SingleStoreOnlineStore": "feast.infra.online_stores.singlestore_online_store.singlestore.SingleStoreOnlineStore", "feast.infra.online_stores.contrib.qdrant.QdrantOnlineStore": "feast.infra.online_stores.cqdrant.QdrantOnlineStore", + "feast.infra.online_stores.contrib.milvus.MilvusOnlineStore": "feast.infra.online_stores.milvus.MilvusOnlineStore", } ONLINE_STORE_CLASS_FOR_TYPE = { diff --git a/sdk/python/feast/templates/milvus/.gitignore b/sdk/python/feast/templates/milvus/.gitignore new file mode 100644 index 00000000000..e86277f60f4 --- /dev/null +++ b/sdk/python/feast/templates/milvus/.gitignore @@ -0,0 +1,45 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*.pyo +*.pyd + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +*.egg-info/ +dist/ +build/ +.venv + +# Pytest +.cache +*.cover +*.log +.coverage +nosetests.xml +coverage.xml +*.hypothesis/ +*.pytest_cache/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IDEs and Editors +.vscode/ +.idea/ +*.swp +*.swo +*.sublime-workspace +*.sublime-project + +# OS generated files +.DS_Store +Thumbs.db diff --git a/sdk/python/feast/templates/milvus/README.md b/sdk/python/feast/templates/milvus/README.md new file mode 100644 index 00000000000..1e617cc442f --- /dev/null +++ b/sdk/python/feast/templates/milvus/README.md @@ -0,0 +1,29 @@ +# Feast Quickstart +If you haven't already, check out the quickstart guide on Feast's website (http://docs.feast.dev/quickstart), which +uses this repo. A quick view of what's in this repository's `feature_repo/` directory: + +* `data/` contains raw demo parquet data +* `feature_repo/example_repo.py` contains demo feature definitions +* `feature_repo/feature_store.yaml` contains a demo setup configuring where data sources are +* `feature_repo/test_workflow.py` showcases how to run all key Feast commands, including defining, retrieving, and pushing features. + +You can run the overall workflow with `python test_workflow.py`. + +## To move from this into a more production ready workflow: +> See more details in [Running Feast in production](https://docs.feast.dev/how-to-guides/running-feast-in-production) + +1. First: you should start with a different Feast template, which delegates to a more scalable offline store. + - For example, running `feast init -t gcp` + or `feast init -t aws` or `feast init -t snowflake`. + - You can see your options if you run `feast init --help`. +2. `feature_store.yaml` points to a local file as a registry. You'll want to setup a remote file (e.g. in S3/GCS) or a +SQL registry. See [registry docs](https://docs.feast.dev/getting-started/concepts/registry) for more details. +3. This example uses a file [offline store](https://docs.feast.dev/getting-started/components/offline-store) + to generate training data. It does not scale. We recommend instead using a data warehouse such as BigQuery, + Snowflake, Redshift. There is experimental support for Spark as well. +4. Setup CI/CD + dev vs staging vs prod environments to automatically update the registry as you change Feast feature definitions. See [docs](https://docs.feast.dev/how-to-guides/running-feast-in-production#1.-automatically-deploying-changes-to-your-feature-definitions). +5. (optional) Regularly scheduled materialization to power low latency feature retrieval (e.g. via Airflow). See [Batch data ingestion](https://docs.feast.dev/getting-started/concepts/data-ingestion#batch-data-ingestion) +for more details. +6. (optional) Deploy feature server instances with `feast serve` to expose endpoints to retrieve online features. + - See [Python feature server](https://docs.feast.dev/reference/feature-servers/python-feature-server) for details. + - Use cases can also directly call the Feast client to fetch features as per [Feature retrieval](https://docs.feast.dev/getting-started/concepts/feature-retrieval) \ No newline at end of file diff --git a/sdk/python/feast/templates/milvus/__init__.py b/sdk/python/feast/templates/milvus/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/templates/milvus/bootstrap.py b/sdk/python/feast/templates/milvus/bootstrap.py new file mode 100644 index 00000000000..9f6a5a6c969 --- /dev/null +++ b/sdk/python/feast/templates/milvus/bootstrap.py @@ -0,0 +1,37 @@ +from feast.file_utils import replace_str_in_file + + +def bootstrap(): + # Bootstrap() will automatically be called from the init_repo() during `feast init` + + import pathlib + from datetime import datetime, timedelta + + from feast.driver_test_data import create_driver_hourly_stats_df + + repo_path = pathlib.Path(__file__).parent.absolute() / "feature_repo" + project_name = pathlib.Path(__file__).parent.absolute().name + data_path = repo_path / "data" + data_path.mkdir(exist_ok=True) + + end_date = datetime.now().replace(microsecond=0, second=0, minute=0) + start_date = end_date - timedelta(days=15) + + driver_entities = [1001, 1002, 1003, 1004, 1005] + driver_df = create_driver_hourly_stats_df(driver_entities, start_date, end_date) + + driver_stats_path = data_path / "driver_stats.parquet" + driver_df.to_parquet(path=str(driver_stats_path), allow_truncated_timestamps=True) + + example_py_file = repo_path / "example_repo.py" + replace_str_in_file(example_py_file, "%PROJECT_NAME%", str(project_name)) + replace_str_in_file( + example_py_file, "%PARQUET_PATH%", str(driver_stats_path.relative_to(repo_path)) + ) + replace_str_in_file( + example_py_file, "%LOGGING_PATH%", str(data_path.relative_to(repo_path)) + ) + + +if __name__ == "__main__": + bootstrap() diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 54129f23c6e..15e3a824265 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -78,6 +78,9 @@ from tests.integration.feature_repos.universal.online_store.dynamodb import ( DynamoDBOnlineStoreCreator, ) +from tests.integration.feature_repos.universal.online_store.milvus import ( + MilvusOnlineStoreCreator, +) from tests.integration.feature_repos.universal.online_store.redis import ( RedisOnlineStoreCreator, ) @@ -163,7 +166,7 @@ AVAILABLE_ONLINE_STORES["datastore"] = ("datastore", None) AVAILABLE_ONLINE_STORES["snowflake"] = (SNOWFLAKE_CONFIG, None) AVAILABLE_ONLINE_STORES["bigtable"] = (BIGTABLE_CONFIG, None) - # AVAILABLE_ONLINE_STORES["milvus"] = (MILVUS_CONFIG, None) + AVAILABLE_ONLINE_STORES["milvus"] = (MILVUS_CONFIG, None) # Uncomment to test using private IKV account. Currently not enabled as # there is no dedicated IKV instance for CI testing and there is no @@ -211,6 +214,7 @@ str, Tuple[Union[str, Dict[str, str]], Optional[Type[OnlineStoreCreator]]] ] = { "redis": (REDIS_CONFIG, RedisOnlineStoreCreator), + "milvus": (MILVUS_CONFIG, MilvusOnlineStoreCreator), "dynamodb": (DYNAMO_CONFIG, DynamoDBOnlineStoreCreator), "datastore": ("datastore", DatastoreOnlineStoreCreator), "bigtable": ("bigtable", BigtableOnlineStoreCreator), diff --git a/setup.py b/setup.py index 16240ff2722..0a282ea9d94 100644 --- a/setup.py +++ b/setup.py @@ -187,7 +187,7 @@ "pytest-mock==1.10.4", "pytest-env", "Sphinx>4.0.0,<7", - "testcontainers==4.8.2", + "testcontainers==4.9.0", "python-keycloak==4.2.2", "pre-commit<3.3.2", "assertpy==1.1",