Skip to content

Commit b54c31a

Browse files
adchiaachals
authored andcommitted
fix: Making a name for data sources not a breaking change (feast-dev#2379)
* fix: Making a name for data sources not a breaking change Signed-off-by: Danny Chiao <danny@tecton.ai> * fix test Signed-off-by: Danny Chiao <danny@tecton.ai> Signed-off-by: Achal Shah <achals@gmail.com>
1 parent a75bfad commit b54c31a

File tree

5 files changed

+43
-27
lines changed

5 files changed

+43
-27
lines changed

sdk/python/feast/infra/offline_stores/bigquery_source.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from feast import type_map
55
from feast.data_source import DataSource
6-
from feast.errors import DataSourceNoNameException, DataSourceNotFoundException
6+
from feast.errors import DataSourceNotFoundException
77
from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
88
from feast.protos.feast.core.SavedDataset_pb2 import (
99
SavedDatasetStorage as SavedDatasetStorageProto,
@@ -16,19 +16,18 @@
1616
class BigQuerySource(DataSource):
1717
def __init__(
1818
self,
19-
name: Optional[str] = None,
2019
event_timestamp_column: Optional[str] = "",
2120
table: Optional[str] = None,
2221
table_ref: Optional[str] = None,
2322
created_timestamp_column: Optional[str] = "",
2423
field_mapping: Optional[Dict[str, str]] = None,
2524
date_partition_column: Optional[str] = "",
2625
query: Optional[str] = None,
26+
name: Optional[str] = None,
2727
):
2828
"""Create a BigQuerySource from an existing table or query.
2929
3030
Args:
31-
name (optional): Name for the source. Defaults to the table_ref if not specified.
3231
table (optional): The BigQuery table where features can be found.
3332
table_ref (optional): (Deprecated) The BigQuery table where features can be found.
3433
event_timestamp_column: Event timestamp column used for point in time joins of feature values.
@@ -37,13 +36,13 @@ def __init__(
3736
or view. Only used for feature columns, not entities or timestamp columns.
3837
date_partition_column (optional): Timestamp column used for partitioning.
3938
query (optional): SQL query to execute to generate data for this data source.
40-
39+
name (optional): Name for the source. Defaults to the table_ref if not specified.
4140
Example:
4241
>>> from feast import BigQuerySource
4342
>>> my_bigquery_source = BigQuerySource(table="gcp_project:bq_dataset.bq_table")
4443
"""
4544
if table is None and table_ref is None and query is None:
46-
raise ValueError('No "table" argument provided.')
45+
raise ValueError('No "table" or "query" argument provided.')
4746
if not table and table_ref:
4847
warnings.warn(
4948
(
@@ -63,7 +62,12 @@ def __init__(
6362
elif table_ref:
6463
_name = table_ref
6564
else:
66-
raise DataSourceNoNameException()
65+
warnings.warn(
66+
(
67+
"Starting in Feast 0.21, Feast will require either a name for a data source (if using query) or `table`."
68+
),
69+
DeprecationWarning,
70+
)
6771

6872
super().__init__(
6973
_name if _name else "",

sdk/python/feast/infra/offline_stores/file_source.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,18 @@ class FileSource(DataSource):
2020
def __init__(
2121
self,
2222
path: str,
23-
name: Optional[str] = "",
2423
event_timestamp_column: Optional[str] = "",
2524
file_format: Optional[FileFormat] = None,
2625
created_timestamp_column: Optional[str] = "",
2726
field_mapping: Optional[Dict[str, str]] = None,
2827
date_partition_column: Optional[str] = "",
2928
s3_endpoint_override: Optional[str] = None,
29+
name: Optional[str] = "",
3030
):
3131
"""Create a FileSource from a file containing feature data. Only Parquet format supported.
3232
3333
Args:
3434
35-
name (optional): Name for the file source. Defaults to the path.
3635
path: File path to file containing feature data. Must contain an event_timestamp column, entity columns and
3736
feature columns.
3837
event_timestamp_column: Event timestamp column used for point in time joins of feature values.
@@ -42,6 +41,7 @@ def __init__(
4241
or view. Only used for feature columns, not entities or timestamp columns.
4342
date_partition_column (optional): Timestamp column used for partitioning.
4443
s3_endpoint_override (optional): Overrides AWS S3 enpoint with custom S3 storage
44+
name (optional): Name for the file source. Defaults to the path.
4545
4646
Examples:
4747
>>> from feast import FileSource

sdk/python/feast/infra/offline_stores/redshift_source.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
1+
import warnings
12
from typing import Callable, Dict, Iterable, Optional, Tuple
23

34
from feast import type_map
45
from feast.data_source import DataSource
5-
from feast.errors import (
6-
DataSourceNoNameException,
7-
DataSourceNotFoundException,
8-
RedshiftCredentialsError,
9-
)
6+
from feast.errors import DataSourceNotFoundException, RedshiftCredentialsError
107
from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
118
from feast.protos.feast.core.SavedDataset_pb2 import (
129
SavedDatasetStorage as SavedDatasetStorageProto,
@@ -19,20 +16,19 @@
1916
class RedshiftSource(DataSource):
2017
def __init__(
2118
self,
22-
name: Optional[str] = None,
2319
event_timestamp_column: Optional[str] = "",
2420
table: Optional[str] = None,
2521
schema: Optional[str] = None,
2622
created_timestamp_column: Optional[str] = "",
2723
field_mapping: Optional[Dict[str, str]] = None,
2824
date_partition_column: Optional[str] = "",
2925
query: Optional[str] = None,
26+
name: Optional[str] = None,
3027
):
3128
"""
3229
Creates a RedshiftSource object.
3330
3431
Args:
35-
name (optional): Name for the source. Defaults to the table_ref if not specified.
3632
event_timestamp_column (optional): Event timestamp column used for point in
3733
time joins of feature values.
3834
table (optional): Redshift table where the features are stored.
@@ -43,6 +39,7 @@ def __init__(
4339
source to column names in a feature table or view.
4440
date_partition_column (optional): Timestamp column used for partitioning.
4541
query (optional): The query to be executed to obtain the features.
42+
name (optional): Name for the source. Defaults to the table_ref if not specified.
4643
"""
4744
if table is None and query is None:
4845
raise ValueError('No "table" argument provided.')
@@ -51,11 +48,15 @@ def __init__(
5148
if table:
5249
_name = table
5350
else:
54-
raise DataSourceNoNameException()
51+
warnings.warn(
52+
(
53+
"Starting in Feast 0.21, Feast will require either a name for a data source (if using query) or `table`."
54+
),
55+
DeprecationWarning,
56+
)
5557

56-
# TODO(adchia): figure out what to do if user uses the query to start
5758
super().__init__(
58-
_name,
59+
_name if _name else "",
5960
event_timestamp_column,
6061
created_timestamp_column,
6162
field_mapping,

sdk/python/feast/infra/offline_stores/snowflake_source.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1+
import warnings
12
from typing import Callable, Dict, Iterable, Optional, Tuple
23

34
from feast import type_map
45
from feast.data_source import DataSource
5-
from feast.errors import DataSourceNoNameException
66
from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
77
from feast.protos.feast.core.SavedDataset_pb2 import (
88
SavedDatasetStorage as SavedDatasetStorageProto,
@@ -15,7 +15,6 @@
1515
class SnowflakeSource(DataSource):
1616
def __init__(
1717
self,
18-
name: Optional[str] = None,
1918
database: Optional[str] = None,
2019
schema: Optional[str] = None,
2120
table: Optional[str] = None,
@@ -24,12 +23,12 @@ def __init__(
2423
created_timestamp_column: Optional[str] = "",
2524
field_mapping: Optional[Dict[str, str]] = None,
2625
date_partition_column: Optional[str] = "",
26+
name: Optional[str] = None,
2727
):
2828
"""
2929
Creates a SnowflakeSource object.
3030
3131
Args:
32-
name (optional): Name for the source. Defaults to the table if not specified.
3332
database (optional): Snowflake database where the features are stored.
3433
schema (optional): Snowflake schema in which the table is located.
3534
table (optional): Snowflake table where the features are stored.
@@ -41,7 +40,7 @@ def __init__(
4140
field_mapping (optional): A dictionary mapping of column names in this data
4241
source to column names in a feature table or view.
4342
date_partition_column (optional): Timestamp column used for partitioning.
44-
43+
name (optional): Name for the source. Defaults to the table if not specified.
4544
"""
4645
if table is None and query is None:
4746
raise ValueError('No "table" argument provided.')
@@ -52,10 +51,15 @@ def __init__(
5251
if table:
5352
_name = table
5453
else:
55-
raise DataSourceNoNameException()
54+
warnings.warn(
55+
(
56+
"Starting in Feast 0.21, Feast will require either a name for a data source (if using query) or `table`."
57+
),
58+
DeprecationWarning,
59+
)
5660

5761
super().__init__(
58-
_name,
62+
_name if _name else "",
5963
event_timestamp_column,
6064
created_timestamp_column,
6165
field_mapping,

sdk/python/tests/integration/registration/test_inference.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
update_data_sources_with_inferred_event_timestamp_col,
2323
update_entities_with_inferred_types_from_feature_views,
2424
)
25+
from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
26+
SparkSource,
27+
)
2528
from feast.on_demand_feature_view import on_demand_feature_view
2629
from tests.utils.data_source_utils import (
2730
prep_file_source,
@@ -83,7 +86,7 @@ def test_infer_datasource_names_file():
8386

8487
def test_infer_datasource_names_dwh():
8588
table = "project.table"
86-
dwh_classes = [BigQuerySource, RedshiftSource, SnowflakeSource]
89+
dwh_classes = [BigQuerySource, RedshiftSource, SnowflakeSource, SparkSource]
8790

8891
for dwh_class in dwh_classes:
8992
data_source = dwh_class(table=table)
@@ -98,9 +101,13 @@ def test_infer_datasource_names_dwh():
98101
assert data_source_with_query.name == source_name
99102

100103
# If we have a query and no name, throw an error
101-
with pytest.raises(DataSourceNoNameException):
102-
print(f"Testing dwh {dwh_class}")
104+
if dwh_class == SparkSource:
105+
with pytest.raises(DataSourceNoNameException):
106+
print(f"Testing dwh {dwh_class}")
107+
data_source = dwh_class(query="test_query")
108+
else:
103109
data_source = dwh_class(query="test_query")
110+
assert data_source.name == ""
104111

105112

106113
@pytest.mark.integration

0 commit comments

Comments
 (0)