Skip to content

Commit ebb7dfe

Browse files
fix: Fix __hash__ methods (#2556)
* Fix __hash__ method for Entity Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix __hash__ method for FeatureService Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Remove references to PrimitiveFeastType Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix __hash__ method for DataSource and PushSource Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix __hash__ method for FeatureView and OnDemandFeatureView Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix __hash__ method for SavedDataset Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix bugs Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix push merge Signed-off-by: Danny Chiao <danny@tecton.ai> Co-authored-by: Danny Chiao <danny@tecton.ai>
1 parent 7076fe0 commit ebb7dfe

20 files changed

+394
-95
lines changed

sdk/python/feast/base_feature_view.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def __str__(self):
110110
return str(MessageToJson(self.to_proto()))
111111

112112
def __hash__(self):
113-
return hash((id(self), self.name))
113+
return hash((self.name))
114114

115115
def __getitem__(self, item):
116116
assert isinstance(item, list)
@@ -134,6 +134,7 @@ def __eq__(self, other):
134134
if (
135135
self.name != other.name
136136
or sorted(self.features) != sorted(other.features)
137+
or self.projection != other.projection
137138
or self.description != other.description
138139
or self.tags != other.tags
139140
or self.owner != other.owner

sdk/python/feast/data_source.py

+33-13
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def __init__(
245245
self.owner = owner or ""
246246

247247
def __hash__(self):
248-
return hash((id(self), self.name))
248+
return hash((self.name, self.timestamp_field))
249249

250250
def __str__(self):
251251
return str(MessageToJson(self.to_proto()))
@@ -263,9 +263,9 @@ def __eq__(self, other):
263263
or self.created_timestamp_column != other.created_timestamp_column
264264
or self.field_mapping != other.field_mapping
265265
or self.date_partition_column != other.date_partition_column
266+
or self.description != other.description
266267
or self.tags != other.tags
267268
or self.owner != other.owner
268-
or self.description != other.description
269269
):
270270
return False
271271

@@ -392,6 +392,9 @@ def __eq__(self, other):
392392
"Comparisons should only involve KafkaSource class objects."
393393
)
394394

395+
if not super().__eq__(other):
396+
return False
397+
395398
if (
396399
self.kafka_options.bootstrap_servers
397400
!= other.kafka_options.bootstrap_servers
@@ -402,6 +405,9 @@ def __eq__(self, other):
402405

403406
return True
404407

408+
def __hash__(self):
409+
return super().__hash__()
410+
405411
@staticmethod
406412
def from_proto(data_source: DataSourceProto):
407413
return KafkaSource(
@@ -507,13 +513,10 @@ def __eq__(self, other):
507513
raise TypeError(
508514
"Comparisons should only involve RequestSource class objects."
509515
)
510-
if (
511-
self.name != other.name
512-
or self.description != other.description
513-
or self.owner != other.owner
514-
or self.tags != other.tags
515-
):
516+
517+
if not super().__eq__(other):
516518
return False
519+
517520
if isinstance(self.schema, List) and isinstance(other.schema, List):
518521
for field1, field2 in zip(self.schema, other.schema):
519522
if field1 != field2:
@@ -671,24 +674,26 @@ def __init__(
671674
)
672675

673676
def __eq__(self, other):
674-
if other is None:
675-
return False
676-
677677
if not isinstance(other, KinesisSource):
678678
raise TypeError(
679679
"Comparisons should only involve KinesisSource class objects."
680680
)
681681

682+
if not super().__eq__(other):
683+
return False
684+
682685
if (
683-
self.name != other.name
684-
or self.kinesis_options.record_format != other.kinesis_options.record_format
686+
self.kinesis_options.record_format != other.kinesis_options.record_format
685687
or self.kinesis_options.region != other.kinesis_options.region
686688
or self.kinesis_options.stream_name != other.kinesis_options.stream_name
687689
):
688690
return False
689691

690692
return True
691693

694+
def __hash__(self):
695+
return super().__hash__()
696+
692697
def to_proto(self) -> DataSourceProto:
693698
data_source_proto = DataSourceProto(
694699
name=self.name,
@@ -744,6 +749,21 @@ def __init__(
744749
if not self.batch_source:
745750
raise ValueError(f"batch_source is needed for push source {self.name}")
746751

752+
def __eq__(self, other):
753+
if not isinstance(other, PushSource):
754+
raise TypeError("Comparisons should only involve PushSource class objects.")
755+
756+
if not super().__eq__(other):
757+
return False
758+
759+
if self.batch_source != other.batch_source:
760+
return False
761+
762+
return True
763+
764+
def __hash__(self):
765+
return super().__hash__()
766+
747767
def validate(self, config: RepoConfig):
748768
pass
749769

sdk/python/feast/diff/registry_diff.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def extract_objects_for_keep_delete_update_add(
177177
FeastObjectType, List[Any]
178178
] = FeastObjectType.get_objects_from_registry(registry, current_project)
179179
registry_object_type_to_repo_contents: Dict[
180-
FeastObjectType, Set[Any]
180+
FeastObjectType, List[Any]
181181
] = FeastObjectType.get_objects_from_repo_contents(desired_repo_contents)
182182

183183
for object_type in FEAST_OBJECT_TYPES:

sdk/python/feast/entity.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def __init__(
132132
self.last_updated_timestamp = None
133133

134134
def __hash__(self) -> int:
135-
return hash((id(self), self.name))
135+
return hash((self.name, self.join_key))
136136

137137
def __eq__(self, other):
138138
if not isinstance(other, Entity):

sdk/python/feast/feature_service.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def __str__(self):
8585
return str(MessageToJson(self.to_proto()))
8686

8787
def __hash__(self):
88-
return hash((id(self), self.name))
88+
return hash((self.name))
8989

9090
def __eq__(self, other):
9191
if not isinstance(other, FeatureService):

sdk/python/feast/feature_store.py

+14-14
Original file line numberDiff line numberDiff line change
@@ -533,25 +533,25 @@ def _plan(
533533
... batch_source=driver_hourly_stats,
534534
... )
535535
>>> registry_diff, infra_diff, new_infra = fs._plan(RepoContents(
536-
... data_sources={driver_hourly_stats},
537-
... feature_views={driver_hourly_stats_view},
538-
... on_demand_feature_views=set(),
539-
... request_feature_views=set(),
540-
... entities={driver},
541-
... feature_services=set())) # register entity and feature view
536+
... data_sources=[driver_hourly_stats],
537+
... feature_views=[driver_hourly_stats_view],
538+
... on_demand_feature_views=list(),
539+
... request_feature_views=list(),
540+
... entities=[driver],
541+
... feature_services=list())) # register entity and feature view
542542
"""
543543
# Validate and run inference on all the objects to be registered.
544544
self._validate_all_feature_views(
545-
list(desired_repo_contents.feature_views),
546-
list(desired_repo_contents.on_demand_feature_views),
547-
list(desired_repo_contents.request_feature_views),
545+
desired_repo_contents.feature_views,
546+
desired_repo_contents.on_demand_feature_views,
547+
desired_repo_contents.request_feature_views,
548548
)
549-
_validate_data_sources(list(desired_repo_contents.data_sources))
549+
_validate_data_sources(desired_repo_contents.data_sources)
550550
self._make_inferences(
551-
list(desired_repo_contents.data_sources),
552-
list(desired_repo_contents.entities),
553-
list(desired_repo_contents.feature_views),
554-
list(desired_repo_contents.on_demand_feature_views),
551+
desired_repo_contents.data_sources,
552+
desired_repo_contents.entities,
553+
desired_repo_contents.feature_views,
554+
desired_repo_contents.on_demand_feature_views,
555555
)
556556

557557
# Compute the desired difference between the current objects in the registry and

sdk/python/feast/feature_view.py

+4-9
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,6 @@ def _initialize_sources(self, name, batch_source, stream_source, source):
270270
self.batch_source = batch_source
271271
self.source = source
272272

273-
# Note: Python requires redefining hash in child classes that override __eq__
274273
def __hash__(self):
275274
return super().__hash__()
276275

@@ -298,19 +297,15 @@ def __eq__(self, other):
298297
return False
299298

300299
if (
301-
self.tags != other.tags
300+
sorted(self.entities) != sorted(other.entities)
302301
or self.ttl != other.ttl
303302
or self.online != other.online
303+
or self.batch_source != other.batch_source
304+
or self.stream_source != other.stream_source
305+
or self.schema != other.schema
304306
):
305307
return False
306308

307-
if sorted(self.entities) != sorted(other.entities):
308-
return False
309-
if self.batch_source != other.batch_source:
310-
return False
311-
if self.stream_source != other.stream_source:
312-
return False
313-
314309
return True
315310

316311
def ensure_valid(self):

sdk/python/feast/on_demand_feature_view.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -234,14 +234,19 @@ def __copy__(self):
234234
return fv
235235

236236
def __eq__(self, other):
237+
if not isinstance(other, OnDemandFeatureView):
238+
raise TypeError(
239+
"Comparisons should only involve OnDemandFeatureView class objects."
240+
)
241+
237242
if not super().__eq__(other):
238243
return False
239244

240245
if (
241-
not self.source_feature_view_projections
242-
== other.source_feature_view_projections
243-
or not self.source_request_sources == other.source_request_sources
244-
or not self.udf.__code__.co_code == other.udf.__code__.co_code
246+
self.source_feature_view_projections
247+
!= other.source_feature_view_projections
248+
or self.source_request_sources != other.source_request_sources
249+
or self.udf.__code__.co_code != other.udf.__code__.co_code
245250
):
246251
return False
247252

sdk/python/feast/registry.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from enum import Enum
1919
from pathlib import Path
2020
from threading import Lock
21-
from typing import Any, Dict, List, Optional, Set
21+
from typing import Any, Dict, List, Optional
2222
from urllib.parse import urlparse
2323

2424
import dill
@@ -98,7 +98,7 @@ def get_objects_from_registry(
9898
@staticmethod
9999
def get_objects_from_repo_contents(
100100
repo_contents: RepoContents,
101-
) -> Dict["FeastObjectType", Set[Any]]:
101+
) -> Dict["FeastObjectType", List[Any]]:
102102
return {
103103
FeastObjectType.DATA_SOURCE: repo_contents.data_sources,
104104
FeastObjectType.ENTITY: repo_contents.entities,

sdk/python/feast/repo_contents.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
from typing import NamedTuple, Set
14+
from typing import List, NamedTuple
1515

1616
from feast.data_source import DataSource
1717
from feast.entity import Entity
@@ -27,12 +27,12 @@ class RepoContents(NamedTuple):
2727
Represents the objects in a Feast feature repo.
2828
"""
2929

30-
data_sources: Set[DataSource]
31-
feature_views: Set[FeatureView]
32-
on_demand_feature_views: Set[OnDemandFeatureView]
33-
request_feature_views: Set[RequestFeatureView]
34-
entities: Set[Entity]
35-
feature_services: Set[FeatureService]
30+
data_sources: List[DataSource]
31+
feature_views: List[FeatureView]
32+
on_demand_feature_views: List[OnDemandFeatureView]
33+
request_feature_views: List[RequestFeatureView]
34+
entities: List[Entity]
35+
feature_services: List[FeatureService]
3636

3737
def to_registry_proto(self) -> RegistryProto:
3838
registry_proto = RegistryProto()

sdk/python/feast/repo_operations.py

+42-22
Original file line numberDiff line numberDiff line change
@@ -94,36 +94,56 @@ def get_repo_files(repo_root: Path) -> List[Path]:
9494

9595

9696
def parse_repo(repo_root: Path) -> RepoContents:
97-
"""Collect feature table definitions from feature repo"""
97+
"""
98+
Collects unique Feast object definitions from the given feature repo.
99+
100+
Specifically, if an object foo has already been added, bar will still be added if
101+
(bar == foo), but not if (bar is foo). This ensures that import statements will
102+
not result in duplicates, but defining two equal objects will.
103+
"""
98104
res = RepoContents(
99-
data_sources=set(),
100-
entities=set(),
101-
feature_views=set(),
102-
feature_services=set(),
103-
on_demand_feature_views=set(),
104-
request_feature_views=set(),
105+
data_sources=[],
106+
entities=[],
107+
feature_views=[],
108+
feature_services=[],
109+
on_demand_feature_views=[],
110+
request_feature_views=[],
105111
)
106112

107113
for repo_file in get_repo_files(repo_root):
108114
module_path = py_path_to_module(repo_file, repo_root)
109115
module = importlib.import_module(module_path)
110116
for attr_name in dir(module):
111117
obj = getattr(module, attr_name)
112-
if isinstance(obj, DataSource):
113-
res.data_sources.add(obj)
114-
if isinstance(obj, FeatureView):
115-
res.feature_views.add(obj)
116-
if isinstance(obj.stream_source, PushSource):
117-
res.data_sources.add(obj.stream_source.batch_source)
118-
elif isinstance(obj, Entity):
119-
res.entities.add(obj)
120-
elif isinstance(obj, FeatureService):
121-
res.feature_services.add(obj)
122-
elif isinstance(obj, OnDemandFeatureView):
123-
res.on_demand_feature_views.add(obj)
124-
elif isinstance(obj, RequestFeatureView):
125-
res.request_feature_views.add(obj)
126-
res.entities.add(DUMMY_ENTITY)
118+
if isinstance(obj, DataSource) and not any(
119+
(obj is ds) for ds in res.data_sources
120+
):
121+
res.data_sources.append(obj)
122+
if isinstance(obj, FeatureView) and not any(
123+
(obj is fv) for fv in res.feature_views
124+
):
125+
res.feature_views.append(obj)
126+
if isinstance(obj.stream_source, PushSource) and not any(
127+
(obj is ds) for ds in res.data_sources
128+
):
129+
res.data_sources.append(obj.stream_source.batch_source)
130+
elif isinstance(obj, Entity) and not any(
131+
(obj is entity) for entity in res.entities
132+
):
133+
res.entities.append(obj)
134+
elif isinstance(obj, FeatureService) and not any(
135+
(obj is fs) for fs in res.feature_services
136+
):
137+
res.feature_services.append(obj)
138+
elif isinstance(obj, OnDemandFeatureView) and not any(
139+
(obj is odfv) for odfv in res.on_demand_feature_views
140+
):
141+
res.on_demand_feature_views.append(obj)
142+
elif isinstance(obj, RequestFeatureView) and not any(
143+
(obj is rfv) for rfv in res.request_feature_views
144+
):
145+
res.request_feature_views.append(obj)
146+
res.entities.append(DUMMY_ENTITY)
127147
return res
128148

129149

0 commit comments

Comments
 (0)