From 33339e2c8933bb3b989b4052ed1b3d308624f2a0 Mon Sep 17 00:00:00 2001 From: Jordan Wolinsky Date: Mon, 8 Aug 2022 14:40:05 -0400 Subject: [PATCH] Expose catalog_name in athena.py (#5548) * expose catalog_name to the sql alchemy uri that is passed into pyathena Co-authored-by: Ravindra Lanka Co-authored-by: Shirshanka Das --- .../src/datahub/ingestion/source/sql/athena.py | 14 ++++++++++++++ .../tests/unit/test_athena_source.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py index 93bea0bd92b414..10396c1e3c4aed 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py @@ -42,12 +42,23 @@ class AthenaConfig(SQLAlchemyConfig): aws_region: str = pydantic.Field( description="Aws region where your Athena database is located" ) + aws_role_arn: Optional[str] = pydantic.Field( + default=None, + description="AWS Role arn for Pyathena to assume in its connection", + ) + aws_role_assumption_duration: int = pydantic.Field( + default=3600, + description="Duration to assume the AWS Role for. Maximum of 43200 (12 hours)", + ) s3_staging_dir: str = pydantic.Field( description="Staging s3 location where the Athena query results will be stored" ) work_group: str = pydantic.Field( description="The name of your Amazon Athena Workgroups" ) + catalog_name: str = pydantic.Field( + default="awsdatacatalog", description="Athena Catalog Name" + ) include_views = False # not supported for Athena @@ -61,6 +72,9 @@ def get_sql_alchemy_url(self): uri_opts={ "s3_staging_dir": self.s3_staging_dir, "work_group": self.work_group, + "catalog_name": self.catalog_name, + "role_arn": self.aws_role_arn, + "duration_seconds": str(self.aws_role_assumption_duration), }, ) diff --git a/metadata-ingestion/tests/unit/test_athena_source.py b/metadata-ingestion/tests/unit/test_athena_source.py index 9bbdebc9866071..317c26286438cb 100644 --- a/metadata-ingestion/tests/unit/test_athena_source.py +++ b/metadata-ingestion/tests/unit/test_athena_source.py @@ -25,7 +25,7 @@ def test_athena_uri(): ) assert ( config.get_sql_alchemy_url() - == "awsathena+rest://@athena.us-west-1.amazonaws.com:443/?s3_staging_dir=s3%3A%2F%2Fsample-staging-dir%2F&work_group=test-workgroup" + == "awsathena+rest://@athena.us-west-1.amazonaws.com:443/?s3_staging_dir=s3%3A%2F%2Fsample-staging-dir%2F&work_group=test-workgroup&catalog_name=awsdatacatalog&duration_seconds=3600" )