Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Remove date partition column field from datasources that don't s… #2478

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions sdk/python/feast/infra/offline_stores/bigquery_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def __init__(
table_ref: Optional[str] = None,
created_timestamp_column: Optional[str] = "",
field_mapping: Optional[Dict[str, str]] = None,
date_partition_column: Optional[str] = "",
date_partition_column: Optional[str] = None,
query: Optional[str] = None,
name: Optional[str] = None,
description: Optional[str] = "",
Expand All @@ -37,7 +37,7 @@ def __init__(
created_timestamp_column (optional): Timestamp column when row was created, used for deduplicating rows.
field_mapping: A dictionary mapping of column names in this data source to feature names in a feature table
or view. Only used for feature columns, not entities or timestamp columns.
date_partition_column (optional): Timestamp column used for partitioning.
date_partition_column (deprecated): Timestamp column used for partitioning.
query (optional): SQL query to execute to generate data for this data source.
name (optional): Name for the source. Defaults to the table_ref if not specified.
description (optional): A human-readable description.
Expand All @@ -61,6 +61,15 @@ def __init__(
table = table_ref
self.bigquery_options = BigQueryOptions(table_ref=table, query=query)

if date_partition_column:
warnings.warn(
(
"The argument 'date_partition_column' is not supported for BigQuery sources. "
"It will be removed in Feast 0.21+"
),
DeprecationWarning,
)

# If no name, use the table_ref as the default name
_name = name
if not _name:
Expand All @@ -78,10 +87,9 @@ def __init__(

super().__init__(
_name if _name else "",
event_timestamp_column,
created_timestamp_column,
field_mapping,
date_partition_column,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
field_mapping=field_mapping,
description=description,
tags=tags,
owner=owner,
Expand Down Expand Up @@ -128,7 +136,6 @@ def from_proto(data_source: DataSourceProto):
table_ref=data_source.bigquery_options.table_ref,
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
query=data_source.bigquery_options.query,
description=data_source.description,
tags=dict(data_source.tags),
Expand All @@ -148,7 +155,6 @@ def to_proto(self) -> DataSourceProto:

data_source_proto.event_timestamp_column = self.event_timestamp_column
data_source_proto.created_timestamp_column = self.created_timestamp_column
data_source_proto.date_partition_column = self.date_partition_column

return data_source_proto

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,21 @@ def __init__(
_name = table
else:
raise DataSourceNoNameException()

if date_partition_column:
warnings.warn(
(
"The argument 'date_partition_column' is not supported for Spark sources."
"It will be removed in Feast 0.21+"
),
DeprecationWarning,
)

super().__init__(
_name,
event_timestamp_column,
created_timestamp_column,
field_mapping,
date_partition_column,
name=_name,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
field_mapping=field_mapping,
description=description,
tags=tags,
owner=owner,
Expand Down Expand Up @@ -130,7 +139,6 @@ def from_proto(data_source: DataSourceProto) -> Any:
file_format=spark_options.file_format,
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
description=data_source.description,
tags=dict(data_source.tags),
owner=data_source.owner,
Expand All @@ -149,7 +157,6 @@ def to_proto(self) -> DataSourceProto:

data_source_proto.event_timestamp_column = self.event_timestamp_column
data_source_proto.created_timestamp_column = self.created_timestamp_column
data_source_proto.date_partition_column = self.date_partition_column

return data_source_proto

Expand Down
20 changes: 13 additions & 7 deletions sdk/python/feast/infra/offline_stores/file_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,20 @@ def __init__(
s3_endpoint_override=s3_endpoint_override,
)

if date_partition_column:
warnings.warn(
(
"The argument 'date_partition_column' is not supported for File sources."
"It will be removed in Feast 0.21+"
),
DeprecationWarning,
)

super().__init__(
name if name else path,
event_timestamp_column,
created_timestamp_column,
field_mapping,
date_partition_column,
name=name if name else path,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
field_mapping=field_mapping,
description=description,
tags=tags,
owner=owner,
Expand Down Expand Up @@ -114,7 +122,6 @@ def from_proto(data_source: DataSourceProto):
path=data_source.file_options.uri,
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
s3_endpoint_override=data_source.file_options.s3_endpoint_override,
description=data_source.description,
tags=dict(data_source.tags),
Expand All @@ -134,7 +141,6 @@ def to_proto(self) -> DataSourceProto:

data_source_proto.event_timestamp_column = self.event_timestamp_column
data_source_proto.created_timestamp_column = self.created_timestamp_column
data_source_proto.date_partition_column = self.date_partition_column

return data_source_proto

Expand Down
21 changes: 13 additions & 8 deletions sdk/python/feast/infra/offline_stores/redshift_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def __init__(
schema: Optional[str] = None,
created_timestamp_column: Optional[str] = "",
field_mapping: Optional[Dict[str, str]] = None,
date_partition_column: Optional[str] = "",
date_partition_column: Optional[str] = None,
query: Optional[str] = None,
name: Optional[str] = None,
description: Optional[str] = "",
Expand All @@ -40,7 +40,7 @@ def __init__(
row was created, used for deduplicating rows.
field_mapping (optional): A dictionary mapping of column names in this data
source to column names in a feature table or view.
date_partition_column (optional): Timestamp column used for partitioning.
date_partition_column (deprecated): Timestamp column used for partitioning.
query (optional): The query to be executed to obtain the features.
name (optional): Name for the source. Defaults to the table_ref if not specified.
description (optional): A human-readable description.
Expand Down Expand Up @@ -68,13 +68,20 @@ def __init__(
),
DeprecationWarning,
)
if date_partition_column:
warnings.warn(
(
"The argument 'date_partition_column' is not supported for Redshift sources."
"It will be removed in Feast 0.21+"
),
DeprecationWarning,
)

super().__init__(
_name if _name else "",
event_timestamp_column,
created_timestamp_column,
field_mapping,
date_partition_column,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
field_mapping=field_mapping,
description=description,
tags=tags,
owner=owner,
Expand All @@ -97,7 +104,6 @@ def from_proto(data_source: DataSourceProto):
schema=data_source.redshift_options.schema,
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
query=data_source.redshift_options.query,
description=data_source.description,
tags=dict(data_source.tags),
Expand Down Expand Up @@ -160,7 +166,6 @@ def to_proto(self) -> DataSourceProto:

data_source_proto.event_timestamp_column = self.event_timestamp_column
data_source_proto.created_timestamp_column = self.created_timestamp_column
data_source_proto.date_partition_column = self.date_partition_column

return data_source_proto

Expand Down
22 changes: 14 additions & 8 deletions sdk/python/feast/infra/offline_stores/snowflake_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ def __init__(
table: Optional[str] = None,
query: Optional[str] = None,
event_timestamp_column: Optional[str] = "",
date_partition_column: Optional[str] = None,
created_timestamp_column: Optional[str] = "",
field_mapping: Optional[Dict[str, str]] = None,
date_partition_column: Optional[str] = "",
name: Optional[str] = None,
description: Optional[str] = "",
tags: Optional[Dict[str, str]] = None,
Expand All @@ -42,7 +42,7 @@ def __init__(
row was created, used for deduplicating rows.
field_mapping (optional): A dictionary mapping of column names in this data
source to column names in a feature table or view.
date_partition_column (optional): Timestamp column used for partitioning.
date_partition_column (deprecated): Timestamp column used for partitioning.
name (optional): Name for the source. Defaults to the table if not specified.
description (optional): A human-readable description.
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
Expand Down Expand Up @@ -72,12 +72,20 @@ def __init__(
DeprecationWarning,
)

if date_partition_column:
warnings.warn(
(
"The argument 'date_partition_column' is not supported for Snowflake sources."
"It will be removed in Feast 0.21+"
),
DeprecationWarning,
)

super().__init__(
_name if _name else "",
event_timestamp_column,
created_timestamp_column,
field_mapping,
date_partition_column,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
field_mapping=field_mapping,
description=description,
tags=tags,
owner=owner,
Expand All @@ -101,7 +109,6 @@ def from_proto(data_source: DataSourceProto):
table=data_source.snowflake_options.table,
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
query=data_source.snowflake_options.query,
description=data_source.description,
tags=dict(data_source.tags),
Expand Down Expand Up @@ -170,7 +177,6 @@ def to_proto(self) -> DataSourceProto:

data_source_proto.event_timestamp_column = self.event_timestamp_column
data_source_proto.created_timestamp_column = self.created_timestamp_column
data_source_proto.date_partition_column = self.date_partition_column

return data_source_proto

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def create_data_source(
table_ref=destination_name,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
date_partition_column="",
field_mapping=field_mapping or {"ts_1": "ts"},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def create_data_source(
path=f"{f.name}",
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
date_partition_column="",
field_mapping=field_mapping or {"ts_1": "ts"},
)

Expand Down Expand Up @@ -130,7 +129,6 @@ def create_data_source(
path=f"s3://{self.bucket}/{filename}",
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
date_partition_column="",
field_mapping=field_mapping or {"ts_1": "ts"},
s3_endpoint_override=f"http://{host}:{port}",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def create_data_source(
table=destination_name,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
date_partition_column="",
field_mapping=field_mapping or {"ts_1": "ts"},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def create_data_source(
table=destination_name,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
date_partition_column="",
field_mapping=field_mapping or {"ts_1": "ts"},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ def create_data_source(
table=destination_name,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
date_partition_column="",
# maps certain column names to other names
field_mapping=field_mapping or {"ts_1": "ts"},
)
Expand Down
4 changes: 0 additions & 4 deletions sdk/python/tests/integration/registration/test_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ def test_apply_feature_view_success(test_registry):
path="file://feast/*",
event_timestamp_column="ts_col",
created_timestamp_column="timestamp",
date_partition_column="date_partition_col",
)

fv1 = FeatureView(
Expand Down Expand Up @@ -243,7 +242,6 @@ def test_modify_feature_views_success(test_registry):
path="file://feast/*",
event_timestamp_column="ts_col",
created_timestamp_column="timestamp",
date_partition_column="date_partition_col",
)

request_source = RequestDataSource(
Expand Down Expand Up @@ -364,7 +362,6 @@ def test_apply_feature_view_integration(test_registry):
path="file://feast/*",
event_timestamp_column="ts_col",
created_timestamp_column="timestamp",
date_partition_column="date_partition_col",
)

fv1 = FeatureView(
Expand Down Expand Up @@ -440,7 +437,6 @@ def test_apply_data_source(test_registry: Registry):
path="file://feast/*",
event_timestamp_column="ts_col",
created_timestamp_column="timestamp",
date_partition_column="date_partition_col",
)

fv1 = FeatureView(
Expand Down