From e19b1c542275dd7f2b18f5bff317e2c2309da038 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Mon, 3 Feb 2025 17:07:17 +1100 Subject: [PATCH 1/5] c --- .../polars/io/cloud/credential_provider.py | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/py-polars/polars/io/cloud/credential_provider.py b/py-polars/polars/io/cloud/credential_provider.py index 0477f3835c84..0d8fb38a4104 100644 --- a/py-polars/polars/io/cloud/credential_provider.py +++ b/py-polars/polars/io/cloud/credential_provider.py @@ -184,13 +184,14 @@ def __init__( *, scopes: list[str] | None = None, tenant_id: str | None = None, + credentials: Any | None = None, _storage_account: str | None = None, _verbose: bool = False, ) -> None: """ Initialize a credential provider for Microsoft Azure. - This uses `azure.identity.DefaultAzureCredential()`. + By default, this uses `azure.identity.DefaultAzureCredential()`. Parameters ---------- @@ -198,20 +199,35 @@ def __init__( Scopes to pass to `get_token` tenant_id Azure tenant ID. + credentials + Optionally pass an instantiated Azure credential class to use (e.g. + azure.identity.DefaultAzureCredential). The credential class must + have a `get_token()` method. """ msg = "`CredentialProviderAzure` functionality is considered unstable" issue_unstable_warning(msg) self.account_name = _storage_account - self.tenant_id = tenant_id self.scopes = ( scopes if scopes is not None else ["https://storage.azure.com/.default"] ) + self.tenant_id = tenant_id + self.credentials = credentials self._verbose = _verbose + if credentials is not None: + # If the user passes a credentials class, we just need to ensure it + # has a `get_token()` method. + if not hasattr(credentials, "get_token"): + msg = ( + f"the provided `credentials` object {credentials!r} does " + "not have a `get_token()` method." + ) + raise ValueError(msg) + # We don't need the module if we are permitted and able to retrieve the # account key from the Azure CLI. - if self._try_get_azure_storage_account_credentials_if_permitted() is None: + elif self._try_get_azure_storage_account_credentials_if_permitted() is None: self._ensure_module_availability() if self._verbose: @@ -233,9 +249,12 @@ def __call__(self) -> CredentialProviderFunctionReturn: return v # Done like this to bypass mypy, we don't have stubs for azure.identity - credential = importlib.import_module("azure.identity").__dict__[ - "DefaultAzureCredential" - ]() + credential = ( + self.credentials + or importlib.import_module("azure.identity").__dict__[ + "DefaultAzureCredential" + ]() + ) token = credential.get_token(*self.scopes, tenant_id=self.tenant_id) return { From 7cc03fa1b8ede38c89170ca7e18a5d812c41322b Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Mon, 3 Feb 2025 17:07:33 +1100 Subject: [PATCH 2/5] c --- py-polars/polars/io/cloud/credential_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/polars/io/cloud/credential_provider.py b/py-polars/polars/io/cloud/credential_provider.py index 0d8fb38a4104..15b756486c66 100644 --- a/py-polars/polars/io/cloud/credential_provider.py +++ b/py-polars/polars/io/cloud/credential_provider.py @@ -201,7 +201,7 @@ def __init__( Azure tenant ID. credentials Optionally pass an instantiated Azure credential class to use (e.g. - azure.identity.DefaultAzureCredential). The credential class must + `azure.identity.DefaultAzureCredential`). The credential class must have a `get_token()` method. """ msg = "`CredentialProviderAzure` functionality is considered unstable" From d290bdec17ff7954300abc9143da0e0875e90c4e Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Mon, 3 Feb 2025 17:18:21 +1100 Subject: [PATCH 3/5] c --- docs/source/_build/API_REFERENCE_LINKS.yml | 3 +++ .../src/python/user-guide/io/cloud-storage.py | 24 +++++++++++++++++++ .../src/rust/user-guide/io/cloud-storage.rs | 3 +++ docs/source/user-guide/io/cloud-storage.md | 3 ++- 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/docs/source/_build/API_REFERENCE_LINKS.yml b/docs/source/_build/API_REFERENCE_LINKS.yml index 6a0ce39d1044..f86b46d2bf8e 100644 --- a/docs/source/_build/API_REFERENCE_LINKS.yml +++ b/docs/source/_build/API_REFERENCE_LINKS.yml @@ -12,6 +12,9 @@ python: concat_list: https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.concat_list.html concat_str: https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.concat_str.html Config: https://docs.pola.rs/api/python/stable/reference/config.html + CredentialProviderAWS: https://docs.pola.rs/api/python/stable/reference/api/polars.CredentialProviderAWS.html + CredentialProviderAzure: https://docs.pola.rs/api/python/stable/reference/api/polars.CredentialProviderAzure.html + CredentialProviderGCP: https://docs.pola.rs/api/python/stable/reference/api/polars.CredentialProviderGCP.html cs.by_name: https://docs.pola.rs/api/python/stable/reference/selectors.html#polars.selectors.by_name cs.contains: https://docs.pola.rs/api/python/stable/reference/selectors.html#polars.selectors.contains cs.first: https://docs.pola.rs/api/python/stable/reference/selectors.html#polars.selectors.first diff --git a/docs/source/src/python/user-guide/io/cloud-storage.py b/docs/source/src/python/user-guide/io/cloud-storage.py index 12b02df28e61..3683d7cbaea1 100644 --- a/docs/source/src/python/user-guide/io/cloud-storage.py +++ b/docs/source/src/python/user-guide/io/cloud-storage.py @@ -63,6 +63,30 @@ def get_credentials() -> pl.CredentialProviderFunctionReturn: df = lf.collect() # --8<-- [end:credential_provider_custom_func] +# --8<-- [start:credential_provider_custom_func_azure] +def credential_provider(): + credential = DefaultAzureCredential(exclude_managed_identity_credential=True) + token = credential.get_token("https://storage.azure.com/.default") + + return {"bearer_token": token.token}, token.expires_on + + +pl.scan_parquet( + "abfss://...@.../...", + credential_provider=credential_provider, +) + +# Note that for the above case, this shortcut is also available: + +pl.scan_parquet( + "abfss://...@.../...", + credential_provider=pl.CredentialProvierAzure( + credentials=DefaultAzureCredential(exclude_managed_identity_credential=True) + ), +) + +# --8<-- [end:credential_provider_custom_func_azure] + # --8<-- [start:scan_pyarrow_dataset] import polars as pl import pyarrow.dataset as ds diff --git a/docs/source/src/rust/user-guide/io/cloud-storage.rs b/docs/source/src/rust/user-guide/io/cloud-storage.rs index 2df882a39c00..5de0b2bb7d63 100644 --- a/docs/source/src/rust/user-guide/io/cloud-storage.rs +++ b/docs/source/src/rust/user-guide/io/cloud-storage.rs @@ -39,6 +39,9 @@ async fn main() { // --8<-- [start:credential_provider_custom_func] // --8<-- [end:credential_provider_custom_func] +// --8<-- [start:credential_provider_custom_func_azure] +// --8<-- [end:credential_provider_custom_func_azure] + // --8<-- [start:scan_pyarrow_dataset] // --8<-- [end:scan_pyarrow_dataset] diff --git a/docs/source/user-guide/io/cloud-storage.md b/docs/source/user-guide/io/cloud-storage.md index f12ad4576ebd..767de6f4710d 100644 --- a/docs/source/user-guide/io/cloud-storage.md +++ b/docs/source/user-guide/io/cloud-storage.md @@ -51,7 +51,7 @@ use for authentication. This can be done in a few ways: functionality. For example, `pl.CredentialProviderAWS` supports selecting AWS profiles, as well as assuming an IAM role: -{{code_block('user-guide/io/cloud-storage','credential_provider_class',['scan_parquet'])}} +{{code_block('user-guide/io/cloud-storage','credential_provider_class',['scan_parquet', 'CredentialProviderAWS'])}} ### Using a custom `credential_provider` function @@ -59,6 +59,7 @@ use for authentication. This can be done in a few ways: cases a Python function can be provided for Polars to use to retrieve credentials: {{code_block('user-guide/io/cloud-storage','credential_provider_custom_func',['scan_parquet'])}} +{{code_block('user-guide/io/cloud-storage','credential_provider_custom_func_azure',['scan_parquet', 'CredentialProviderAzure'])}} ## Scanning with PyArrow From ad0315223ff453760e8a4e8a2f668ed12dc6b3b7 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Mon, 3 Feb 2025 17:21:31 +1100 Subject: [PATCH 4/5] c --- docs/source/user-guide/io/cloud-storage.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/user-guide/io/cloud-storage.md b/docs/source/user-guide/io/cloud-storage.md index 767de6f4710d..03f79e0ca9ae 100644 --- a/docs/source/user-guide/io/cloud-storage.md +++ b/docs/source/user-guide/io/cloud-storage.md @@ -59,6 +59,9 @@ use for authentication. This can be done in a few ways: cases a Python function can be provided for Polars to use to retrieve credentials: {{code_block('user-guide/io/cloud-storage','credential_provider_custom_func',['scan_parquet'])}} + +- Example for Azure: + {{code_block('user-guide/io/cloud-storage','credential_provider_custom_func_azure',['scan_parquet', 'CredentialProviderAzure'])}} ## Scanning with PyArrow From 65985df7c8160374e0f67f2cb64249f4e41116cb Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Mon, 3 Feb 2025 17:23:25 +1100 Subject: [PATCH 5/5] c --- docs/source/src/python/user-guide/io/cloud-storage.py | 2 +- docs/source/user-guide/io/cloud-storage.md | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/src/python/user-guide/io/cloud-storage.py b/docs/source/src/python/user-guide/io/cloud-storage.py index 3683d7cbaea1..07be82588a3b 100644 --- a/docs/source/src/python/user-guide/io/cloud-storage.py +++ b/docs/source/src/python/user-guide/io/cloud-storage.py @@ -80,7 +80,7 @@ def credential_provider(): pl.scan_parquet( "abfss://...@.../...", - credential_provider=pl.CredentialProvierAzure( + credential_provider=pl.CredentialProviderAzure( credentials=DefaultAzureCredential(exclude_managed_identity_credential=True) ), ) diff --git a/docs/source/user-guide/io/cloud-storage.md b/docs/source/user-guide/io/cloud-storage.md index 03f79e0ca9ae..476111577ff1 100644 --- a/docs/source/user-guide/io/cloud-storage.md +++ b/docs/source/user-guide/io/cloud-storage.md @@ -51,7 +51,8 @@ use for authentication. This can be done in a few ways: functionality. For example, `pl.CredentialProviderAWS` supports selecting AWS profiles, as well as assuming an IAM role: -{{code_block('user-guide/io/cloud-storage','credential_provider_class',['scan_parquet', 'CredentialProviderAWS'])}} +{{code_block('user-guide/io/cloud-storage','credential_provider_class',['scan_parquet', +'CredentialProviderAWS'])}} ### Using a custom `credential_provider` function @@ -62,7 +63,8 @@ use for authentication. This can be done in a few ways: - Example for Azure: -{{code_block('user-guide/io/cloud-storage','credential_provider_custom_func_azure',['scan_parquet', 'CredentialProviderAzure'])}} +{{code_block('user-guide/io/cloud-storage','credential_provider_custom_func_azure',['scan_parquet', +'CredentialProviderAzure'])}} ## Scanning with PyArrow