Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make sure FeatureViews with same name can not be applied at the same … #1651

Merged
merged 5 commits into from
Jul 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions sdk/python/feast/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def apply(
assert isinstance(objects, list)

views_to_update = [ob for ob in objects if isinstance(ob, FeatureView)]
_validate_feature_views(views_to_update)
entities_to_update = [ob for ob in objects if isinstance(ob, Entity)]
services_to_update = [ob for ob in objects if isinstance(ob, FeatureService)]

Expand Down Expand Up @@ -814,3 +815,15 @@ def _print_materialization_log(
f" to {Style.BRIGHT + Fore.GREEN}{end_date.replace(microsecond=0).astimezone()}{Style.RESET_ALL}"
f" into the {Style.BRIGHT + Fore.GREEN}{online_store}{Style.RESET_ALL} online store.\n"
)


def _validate_feature_views(feature_views: List[FeatureView]):
""" Verify feature views have unique names"""
name_to_fv_dict = {}
for fv in feature_views:
if fv.name in name_to_fv_dict:
raise ValueError(
f"More than one feature view with name {fv.name} found. Please ensure that all feature view names are unique. It may be necessary to ignore certain files in your feature repository by using a .feastignore file."
)
else:
name_to_fv_dict[fv.name] = fv
4 changes: 2 additions & 2 deletions sdk/python/feast/repo_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from feast import Entity, FeatureTable
from feast.feature_service import FeatureService
from feast.feature_store import FeatureStore
from feast.feature_store import FeatureStore, _validate_feature_views
from feast.feature_view import FeatureView
from feast.inference import (
update_data_sources_with_inferred_event_timestamp_col,
Expand Down Expand Up @@ -103,7 +103,6 @@ def parse_repo(repo_root: Path) -> ParsedRepo:
for repo_file in get_repo_files(repo_root):
module_path = py_path_to_module(repo_file, repo_root)
module = importlib.import_module(module_path)

for attr_name in dir(module):
obj = getattr(module, attr_name)
if isinstance(obj, FeatureTable):
Expand Down Expand Up @@ -162,6 +161,7 @@ def apply_total(repo_config: RepoConfig, repo_path: Path, skip_source_validation
registry._initialize_registry()
sys.dont_write_bytecode = True
repo = parse_repo(repo_path)
_validate_feature_views(repo.feature_views)
data_sources = [t.batch_source for t in repo.feature_views]

if not skip_source_validation:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from google.protobuf.duration_pb2 import Duration

from feast import FeatureView, FileSource

driver_hourly_stats = FileSource(
path="driver_stats.parquet", # this parquet is not real and will not be read
)

driver_hourly_stats_view = FeatureView(
name="driver_hourly_stats", # Intentionally use the same FeatureView name
entities=["driver_id"],
online=False,
input=driver_hourly_stats,
ttl=Duration(seconds=10),
tags={},
)

driver_hourly_stats_view_dup1 = FeatureView(
name="driver_hourly_stats", # Intentionally use the same FeatureView name
entities=["driver_id"],
online=False,
input=driver_hourly_stats,
ttl=Duration(seconds=10),
tags={},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import tempfile
from pathlib import Path
from textwrap import dedent

from tests.utils.cli_utils import CliRunner, get_example_repo


def test_cli_apply_duplicated_featureview_names() -> None:
"""
Test apply feature views with duplicated names and single py file in a feature repo using CLI
"""

with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
runner = CliRunner()
# Construct an example repo in a temporary dir
repo_path = Path(repo_dir_name)
data_path = Path(data_dir_name)

repo_config = repo_path / "feature_store.yaml"

repo_config.write_text(
dedent(
f"""
project: foo
registry: {data_path / "registry.db"}
provider: local
online_store:
path: {data_path / "online_store.db"}
"""
)
)

repo_example = repo_path / "example.py"
repo_example.write_text(
get_example_repo(
"example_feature_repo_with_duplicated_featureview_names.py"
)
)
rc, output = runner.run_with_output(["apply"], cwd=repo_path)

assert (
rc != 0
and b"Please ensure that all feature view names are unique" in output
)


def test_cli_apply_duplicated_featureview_names_multiple_py_files() -> None:
"""
Test apply feature views with duplicated names from multiple py files in a feature repo using CLI
"""

with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
runner = CliRunner()
# Construct an example repo in a temporary dir
repo_path = Path(repo_dir_name)
data_path = Path(data_dir_name)

repo_config = repo_path / "feature_store.yaml"

repo_config.write_text(
dedent(
f"""
project: foo
registry: {data_path / "registry.db"}
provider: local
online_store:
path: {data_path / "online_store.db"}
"""
)
)
# Create multiple py files containing the same feature view name
for i in range(3):
repo_example = repo_path / f"example{i}.py"
repo_example.write_text(get_example_repo("example_feature_repo_2.py"))
rc, output = runner.run_with_output(["apply"], cwd=repo_path)

assert (
rc != 0
and b"Please ensure that all feature view names are unique" in output
)
33 changes: 33 additions & 0 deletions sdk/python/tests/integration/registration/test_feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,3 +480,36 @@ def test_reapply_feature_view_success(test_feature_store, dataframe_source):
assert len(fv_stored.materialization_intervals) == 0

test_feature_store.teardown()


def test_apply_duplicated_featureview_names(feature_store_with_local_registry):
""" Test applying feature views with duplicated names"""

driver_stats = FeatureView(
name="driver_hourly_stats",
entities=["driver_id"],
ttl=timedelta(seconds=10),
online=False,
input=FileSource(path="driver_stats.parquet"),
tags={},
)

customer_stats = FeatureView(
name="driver_hourly_stats",
entities=["id"],
ttl=timedelta(seconds=10),
online=False,
input=FileSource(path="customer_stats.parquet"),
tags={},
)
try:
feature_store_with_local_registry.apply([driver_stats, customer_stats])
error = None
except ValueError as e:
error = e
assert (
isinstance(error, ValueError)
and "Please ensure that all feature view names are unique" in error.args[0]
)

feature_store_with_local_registry.teardown()