Skip to content

Commit 2895d94

Browse files
committed
Support entity fields in feature view schema parameter by dropping them
Signed-off-by: Felix Wang <wangfelix98@gmail.com>
1 parent d1f76e5 commit 2895d94

File tree

3 files changed

+87
-8
lines changed

3 files changed

+87
-8
lines changed

sdk/python/feast/entity.py

+29-2
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class Entity:
4040
owner: The owner of the entity, typically the email of the primary maintainer.
4141
created_timestamp: The time when the entity was created.
4242
last_updated_timestamp: The time when the entity was last updated.
43-
join_keys: A list of property that uniquely identifies different entities within the
43+
join_keys: A list of properties that uniquely identifies different entities within the
4444
collection. This is meant to replace the `join_key` parameter, but currently only
4545
supports a list of size one.
4646
"""
@@ -67,7 +67,25 @@ def __init__(
6767
owner: str = "",
6868
join_keys: Optional[List[str]] = None,
6969
):
70-
"""Creates an Entity object."""
70+
"""
71+
Creates an Entity object.
72+
73+
Args:
74+
name: The unique name of the entity.
75+
value_type: The type of the entity, such as string or float.
76+
description: A human-readable description.
77+
join_key (deprecated): A property that uniquely identifies different entities within the
78+
collection. The join_key property is typically used for joining entities
79+
with their associated features. If not specified, defaults to the name.
80+
tags: A dictionary of key-value pairs to store arbitrary metadata.
81+
owner: The owner of the entity, typically the email of the primary maintainer.
82+
join_keys: A list of properties that uniquely identifies different entities within the
83+
collection. This is meant to replace the `join_key` parameter, but currently only
84+
supports a list of size one.
85+
86+
Raises:
87+
ValueError: Parameters are specified incorrectly.
88+
"""
7189
if len(args) == 1:
7290
warnings.warn(
7391
(
@@ -88,6 +106,15 @@ def __init__(
88106

89107
self.value_type = value_type
90108

109+
if join_key:
110+
warnings.warn(
111+
(
112+
"The `join_key` parameter is being deprecated in favor of the `join_keys` parameter. "
113+
"Please switch from using `join_key` to `join_keys`. Feast 0.22 and onwards will not "
114+
"support the `join_key` parameter."
115+
),
116+
DeprecationWarning,
117+
)
91118
self.join_keys = join_keys or []
92119
if join_keys and len(join_keys) > 1:
93120
raise ValueError(

sdk/python/feast/inference.py

+5
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,13 @@ def update_feature_views_with_inferred_features(
150150
config: The config for the current feature store.
151151
"""
152152
entity_name_to_join_key_map = {entity.name: entity.join_key for entity in entities}
153+
join_keys = entity_name_to_join_key_map.values()
153154

154155
for fv in fvs:
156+
# First drop all Entity fields. Then infer features if necessary.
157+
fv.schema = [field for field in fv.schema if field.name not in join_keys]
158+
fv.features = [field for field in fv.features if field.name not in join_keys]
159+
155160
if not fv.features:
156161
columns_to_exclude = {
157162
fv.batch_source.timestamp_field,

sdk/python/tests/integration/registration/test_inference.py

+53-6
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,13 @@
2424
from feast.inference import (
2525
update_data_sources_with_inferred_event_timestamp_col,
2626
update_entities_with_inferred_types_from_feature_views,
27+
update_feature_views_with_inferred_features,
2728
)
2829
from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
2930
SparkSource,
3031
)
3132
from feast.on_demand_feature_view import on_demand_feature_view
32-
from feast.types import PrimitiveFeastType, String, UnixTimestamp
33+
from feast.types import Float32, PrimitiveFeastType, String, UnixTimestamp
3334
from tests.utils.data_source_utils import (
3435
prep_file_source,
3536
simple_bq_source_using_query_arg,
@@ -168,15 +169,14 @@ def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_so
168169
def test_on_demand_features_type_inference():
169170
# Create Feature Views
170171
date_request = RequestSource(
171-
name="date_request",
172-
schema=[Field(name="some_date", dtype=PrimitiveFeastType.UNIX_TIMESTAMP)],
172+
name="date_request", schema=[Field(name="some_date", dtype=UnixTimestamp)],
173173
)
174174

175175
@on_demand_feature_view(
176176
sources={"date_request": date_request},
177-
features=[
178-
Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP),
179-
Feature(name="string_output", dtype=ValueType.STRING),
177+
schema=[
178+
Field(name="output", dtype=UnixTimestamp),
179+
Field(name="string_output", dtype=String),
180180
],
181181
)
182182
def test_view(features_df: pd.DataFrame) -> pd.DataFrame:
@@ -285,3 +285,50 @@ def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame:
285285

286286
with pytest.raises(SpecifiedFeaturesNotPresentError):
287287
test_view_with_missing_feature.infer_features()
288+
289+
290+
def test_update_feature_views_with_inferred_features():
291+
file_source = FileSource(name="test", path="test path")
292+
entity1 = Entity(name="test1", join_key="test_column_1")
293+
entity2 = Entity(name="test2", join_key="test_column_2")
294+
feature_view_1 = FeatureView(
295+
name="test1",
296+
entities=[entity1],
297+
schema=[
298+
Field(name="feature", dtype=Float32),
299+
Field(name="test_column_1", dtype=String),
300+
],
301+
source=file_source,
302+
)
303+
feature_view_2 = FeatureView(
304+
name="test2",
305+
entities=[entity1, entity2],
306+
schema=[
307+
Field(name="feature", dtype=Float32),
308+
Field(name="test_column_1", dtype=String),
309+
Field(name="test_column_2", dtype=String),
310+
],
311+
source=file_source,
312+
)
313+
314+
assert len(feature_view_1.schema) == 2
315+
assert len(feature_view_1.features) == 2
316+
317+
# The entity field should be deleted from the schema and features of the feature view.
318+
update_feature_views_with_inferred_features(
319+
[feature_view_1], [entity1], RepoConfig(provider="local", project="test")
320+
)
321+
assert len(feature_view_1.schema) == 1
322+
assert len(feature_view_1.features) == 1
323+
324+
assert len(feature_view_2.schema) == 3
325+
assert len(feature_view_2.features) == 3
326+
327+
# The entity fields should be deleted from the schema and features of the feature view.
328+
update_feature_views_with_inferred_features(
329+
[feature_view_2],
330+
[entity1, entity2],
331+
RepoConfig(provider="local", project="test"),
332+
)
333+
assert len(feature_view_2.schema) == 1
334+
assert len(feature_view_2.features) == 1

0 commit comments

Comments
 (0)