From 54faa5667305b4e48ca619a8a66a64d67f10574c Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 31 Mar 2022 18:48:19 -0700 Subject: [PATCH 1/3] Fix Signed-off-by: Kevin Zhang --- protos/feast/core/DataSource.proto | 3 ++ .../infra/offline_stores/redshift_source.py | 43 ++++++++++++++++--- sdk/python/feast/templates/aws/bootstrap.py | 5 ++- sdk/python/feast/templates/aws/driver_repo.py | 2 + .../universal/data_sources/redshift.py | 1 + 5 files changed, 46 insertions(+), 8 deletions(-) diff --git a/protos/feast/core/DataSource.proto b/protos/feast/core/DataSource.proto index 8fe84274a1f..7ff5d608269 100644 --- a/protos/feast/core/DataSource.proto +++ b/protos/feast/core/DataSource.proto @@ -145,6 +145,9 @@ message DataSource { // Redshift schema name string schema = 3; + + // Redshift database name + string database = 4; } // Defines options for DataSource that sources features from a Snowflake Query diff --git a/sdk/python/feast/infra/offline_stores/redshift_source.py b/sdk/python/feast/infra/offline_stores/redshift_source.py index b2fa143a860..229da289341 100644 --- a/sdk/python/feast/infra/offline_stores/redshift_source.py +++ b/sdk/python/feast/infra/offline_stores/redshift_source.py @@ -27,6 +27,7 @@ def __init__( description: Optional[str] = "", tags: Optional[Dict[str, str]] = None, owner: Optional[str] = "", + database: Optional[str] = "", ): """ Creates a RedshiftSource object. @@ -47,11 +48,12 @@ def __init__( tags (optional): A dictionary of key-value pairs to store arbitrary metadata. owner (optional): The owner of the redshift source, typically the email of the primary maintainer. + database (optional): The Redshift database name. """ # The default Redshift schema is named "public". _schema = "public" if table and not schema else schema self.redshift_options = RedshiftOptions( - table=table, schema=_schema, query=query + table=table, schema=_schema, query=query, database=database ) if table is None and query is None: @@ -102,6 +104,7 @@ def from_proto(data_source: DataSourceProto): description=data_source.description, tags=dict(data_source.tags), owner=data_source.owner, + database=data_source.redshift_options.database, ) # Note: Python requires redefining hash in child classes that override __eq__ @@ -119,6 +122,7 @@ def __eq__(self, other): and self.redshift_options.table == other.redshift_options.table and self.redshift_options.schema == other.redshift_options.schema and self.redshift_options.query == other.redshift_options.query + and self.redshift_options.database == other.redshift_options.database and self.event_timestamp_column == other.event_timestamp_column and self.created_timestamp_column == other.created_timestamp_column and self.field_mapping == other.field_mapping @@ -139,9 +143,15 @@ def schema(self): @property def query(self): - """Returns the Redshift options of this Redshift source.""" + """Returns the Redshift query of this Redshift source.""" return self.redshift_options.query + @property + def database(self): + """Returns the Redshift database of this Redshift source.""" + return self.redshift_options.database + + def to_proto(self) -> DataSourceProto: """ Converts a RedshiftSource object to its protobuf representation. @@ -197,12 +207,19 @@ def get_table_column_names_and_types( assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) client = aws_utils.get_redshift_data_client(config.offline_store.region) - + if not self.database: + warnings.warn( + ( + "You are using redshift database name from your offline store config. Feast is deprecating this parameter soon in Feast 0.23." + "Please pass database name to RedshiftSource in your driver python file instead. " + ), + DeprecationWarning, + ) if self.table is not None: try: table = client.describe_table( ClusterIdentifier=config.offline_store.cluster_id, - Database=config.offline_store.database, + Database=(self.database if self.database else config.offline_store.database), DbUser=config.offline_store.user, Table=self.table, Schema=self.schema, @@ -221,7 +238,7 @@ def get_table_column_names_and_types( statement_id = aws_utils.execute_redshift_statement( client, config.offline_store.cluster_id, - config.offline_store.database, + self.database if self.database else config.offline_store.database, config.offline_store.user, f"SELECT * FROM ({self.query}) LIMIT 1", ) @@ -238,11 +255,12 @@ class RedshiftOptions: """ def __init__( - self, table: Optional[str], schema: Optional[str], query: Optional[str] + self, table: Optional[str], schema: Optional[str], query: Optional[str], database: Optional[str] ): self._table = table self._schema = schema self._query = query + self._database = database @property def query(self): @@ -274,6 +292,16 @@ def schema(self, schema): """Sets the schema of this Redshift table.""" self._schema = schema + @property + def database(self): + """Returns the schema name of this Redshift table.""" + return self._database + + @database.setter + def database(self, database): + """Sets the database name of this Redshift table.""" + self._database = database + @classmethod def from_proto(cls, redshift_options_proto: DataSourceProto.RedshiftOptions): """ @@ -289,6 +317,7 @@ def from_proto(cls, redshift_options_proto: DataSourceProto.RedshiftOptions): table=redshift_options_proto.table, schema=redshift_options_proto.schema, query=redshift_options_proto.query, + database=redshift_options_proto.database, ) return redshift_options @@ -301,7 +330,7 @@ def to_proto(self) -> DataSourceProto.RedshiftOptions: A RedshiftOptionsProto protobuf. """ redshift_options_proto = DataSourceProto.RedshiftOptions( - table=self.table, schema=self.schema, query=self.query, + table=self.table, schema=self.schema, query=self.query, database=self.database ) return redshift_options_proto diff --git a/sdk/python/feast/templates/aws/bootstrap.py b/sdk/python/feast/templates/aws/bootstrap.py index 2b64a27e627..9ab032d6bff 100644 --- a/sdk/python/feast/templates/aws/bootstrap.py +++ b/sdk/python/feast/templates/aws/bootstrap.py @@ -53,14 +53,17 @@ def bootstrap(): repo_path = pathlib.Path(__file__).parent.absolute() config_file = repo_path / "feature_store.yaml" + driver_file = repo_path / "driver_repo.py" replace_str_in_file(config_file, "%AWS_REGION%", aws_region) replace_str_in_file(config_file, "%REDSHIFT_CLUSTER_ID%", cluster_id) replace_str_in_file(config_file, "%REDSHIFT_DATABASE%", database) + replace_str_in_file(driver_file, "%REDSHIFT_DATABASE%", database) replace_str_in_file(config_file, "%REDSHIFT_USER%", user) - replace_str_in_file( + replace_str_in_file(driver_file, config_file, "%REDSHIFT_S3_STAGING_LOCATION%", s3_staging_location ) + replace_str_in_file(config_file, ) replace_str_in_file(config_file, "%REDSHIFT_IAM_ROLE%", iam_role) diff --git a/sdk/python/feast/templates/aws/driver_repo.py b/sdk/python/feast/templates/aws/driver_repo.py index 10a0dbb638b..e72d24ae5ec 100644 --- a/sdk/python/feast/templates/aws/driver_repo.py +++ b/sdk/python/feast/templates/aws/driver_repo.py @@ -27,6 +27,8 @@ # The (optional) created timestamp is used to ensure there are no duplicate # feature rows in the offline store or when building training datasets created_timestamp_column="created", + # Database to redshift source. + database="%REDSHIFT_DATABASE%" ) # Feature views are a grouping based on how features are stored in either the diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py index 49b31263cf9..7fea5f22044 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py @@ -65,6 +65,7 @@ def create_data_source( created_timestamp_column=created_timestamp_column, date_partition_column="", field_mapping=field_mapping or {"ts_1": "ts"}, + database=self.offline_store_config.database, ) def create_saved_dataset_destination(self) -> SavedDatasetRedshiftStorage: From c0e0597d9ec262f3ad0ee188d655260893137c3d Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Fri, 1 Apr 2022 12:32:18 -0700 Subject: [PATCH 2/3] Fix Signed-off-by: Kevin Zhang --- .../infra/offline_stores/redshift_source.py | 20 ++++++++++++++----- sdk/python/feast/templates/aws/bootstrap.py | 6 +++--- sdk/python/feast/templates/aws/driver_repo.py | 2 +- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/redshift_source.py b/sdk/python/feast/infra/offline_stores/redshift_source.py index 229da289341..6c34aa5f983 100644 --- a/sdk/python/feast/infra/offline_stores/redshift_source.py +++ b/sdk/python/feast/infra/offline_stores/redshift_source.py @@ -151,7 +151,6 @@ def database(self): """Returns the Redshift database of this Redshift source.""" return self.redshift_options.database - def to_proto(self) -> DataSourceProto: """ Converts a RedshiftSource object to its protobuf representation. @@ -219,7 +218,11 @@ def get_table_column_names_and_types( try: table = client.describe_table( ClusterIdentifier=config.offline_store.cluster_id, - Database=(self.database if self.database else config.offline_store.database), + Database=( + self.database + if self.database + else config.offline_store.database + ), DbUser=config.offline_store.user, Table=self.table, Schema=self.schema, @@ -255,7 +258,11 @@ class RedshiftOptions: """ def __init__( - self, table: Optional[str], schema: Optional[str], query: Optional[str], database: Optional[str] + self, + table: Optional[str], + schema: Optional[str], + query: Optional[str], + database: Optional[str], ): self._table = table self._schema = schema @@ -330,7 +337,10 @@ def to_proto(self) -> DataSourceProto.RedshiftOptions: A RedshiftOptionsProto protobuf. """ redshift_options_proto = DataSourceProto.RedshiftOptions( - table=self.table, schema=self.schema, query=self.query, database=self.database + table=self.table, + schema=self.schema, + query=self.query, + database=self.database, ) return redshift_options_proto @@ -343,7 +353,7 @@ class SavedDatasetRedshiftStorage(SavedDatasetStorage): def __init__(self, table_ref: str): self.redshift_options = RedshiftOptions( - table=table_ref, schema=None, query=None + table=table_ref, schema=None, query=None, database=None ) @staticmethod diff --git a/sdk/python/feast/templates/aws/bootstrap.py b/sdk/python/feast/templates/aws/bootstrap.py index 9ab032d6bff..80c2480d254 100644 --- a/sdk/python/feast/templates/aws/bootstrap.py +++ b/sdk/python/feast/templates/aws/bootstrap.py @@ -60,10 +60,10 @@ def bootstrap(): replace_str_in_file(config_file, "%REDSHIFT_DATABASE%", database) replace_str_in_file(driver_file, "%REDSHIFT_DATABASE%", database) replace_str_in_file(config_file, "%REDSHIFT_USER%", user) - replace_str_in_file(driver_file, - config_file, "%REDSHIFT_S3_STAGING_LOCATION%", s3_staging_location + replace_str_in_file( + driver_file, config_file, "%REDSHIFT_S3_STAGING_LOCATION%", s3_staging_location ) - replace_str_in_file(config_file, ) + replace_str_in_file(config_file,) replace_str_in_file(config_file, "%REDSHIFT_IAM_ROLE%", iam_role) diff --git a/sdk/python/feast/templates/aws/driver_repo.py b/sdk/python/feast/templates/aws/driver_repo.py index e72d24ae5ec..69cc21a647c 100644 --- a/sdk/python/feast/templates/aws/driver_repo.py +++ b/sdk/python/feast/templates/aws/driver_repo.py @@ -28,7 +28,7 @@ # feature rows in the offline store or when building training datasets created_timestamp_column="created", # Database to redshift source. - database="%REDSHIFT_DATABASE%" + database="%REDSHIFT_DATABASE%", ) # Feature views are a grouping based on how features are stored in either the From 0dca0ecaf59dab7d8d9a413b336a0014b096a4e7 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Fri, 1 Apr 2022 12:41:46 -0700 Subject: [PATCH 3/3] Remove warning Signed-off-by: Kevin Zhang --- sdk/python/feast/infra/offline_stores/redshift_source.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/redshift_source.py b/sdk/python/feast/infra/offline_stores/redshift_source.py index 6c34aa5f983..d41b0439378 100644 --- a/sdk/python/feast/infra/offline_stores/redshift_source.py +++ b/sdk/python/feast/infra/offline_stores/redshift_source.py @@ -206,14 +206,6 @@ def get_table_column_names_and_types( assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) client = aws_utils.get_redshift_data_client(config.offline_store.region) - if not self.database: - warnings.warn( - ( - "You are using redshift database name from your offline store config. Feast is deprecating this parameter soon in Feast 0.23." - "Please pass database name to RedshiftSource in your driver python file instead. " - ), - DeprecationWarning, - ) if self.table is not None: try: table = client.describe_table(