From 56d7ebc52f44a1699ecc4991952bfda9e3382619 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 2 Nov 2022 10:39:12 -0500 Subject: [PATCH 1/2] Added convenience method for getting filesystem --- CHANGELOG.md | 2 ++ README.md | 64 ++++++++++++++++++++++++---------- planetary_computer/__init__.py | 4 +++ planetary_computer/_adlfs.py | 53 ++++++++++++++++++++++++++++ planetary_computer/sas.py | 7 +++- requirements-dev.txt | 3 +- scripts/cibuild | 2 +- scripts/test | 2 +- setup.cfg | 3 ++ tests/test_adlfs.py | 24 +++++++++++++ 10 files changed, 142 insertions(+), 22 deletions(-) create mode 100644 planetary_computer/_adlfs.py create mode 100644 tests/test_adlfs.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a097a46..fd2c2da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ ## New Features * `sign` now automatically retries failed HTTP requests. +* Added a convenience method `planetary_computer.get_container_client` for getting an authenticated ``azure.storage.blob.ContainerClient``. +* Added a convenience method `planetary_computer.get_adlfs_filesystem` for getting an authenticated ``adlfs.AzureBlobFileSystem``. # 0.4.7 diff --git a/README.md b/README.md index a0b7c46..4ae2d24 100644 --- a/README.md +++ b/README.md @@ -19,32 +19,31 @@ planetarycomputer configure Alternatively, a subscription key may be provided by specifying it in the `PC_SDK_SUBSCRIPTION_KEY` environment variable. A subcription key is not required for interacting with the service, however having one in place allows for less restricted rate limiting. -## Development - -The following steps may be followed in order to develop locally: - -```bash -## Create and activate venv -python3 -m venv env -source env/bin/activate +## Usage -## Install requirements -python3 -m pip install -r requirements-dev.txt +This library assists with signing Azure Blob Storage URLs. The `sign` function operates directly on an HREF string, as well as several [PySTAC](https://github.com/stac-utils/pystac) objects: `Asset`, `Item`, and `ItemCollection`. In addition, the `sign` function accepts a [STAC API Client](https://pystac-client.readthedocs.io/en/stable/) `ItemSearch`, which performs a search and returns the resulting `ItemCollection` with all assets signed. -## Install locally -pip install -e . +### Automatic signing -## Format code -./scripts/format +If you're using pystac-client we recommend you use its feature to [automatically sign results](https://pystac-client.readthedocs.io/en/stable/usage.html#automatically-modifying-results) with ``planetary_computer.sign_inplace``: -## Run tests -./scripts/test +```python +import planetary_computer +import pystac_client + +from pystac_client import Client +import planetary_computer, requests +api = Client.open( + 'https://planetarycomputer.microsoft.com/api/stac/v1', + modifier=planetary_computer.sign_inplace, +) ``` +Now all the results you get from that client will be signed. -## Usage +### Manual signing -This library currently assists with signing Azure Blob Storage URLs. The `sign` function operates directly on an HREF string, as well as several [PySTAC](https://github.com/stac-utils/pystac) objects: `Asset`, `Item`, and `ItemCollection`. In addition, the `sign` function accepts a [STAC API Client](https://github.com/stac-utils/pystac-client) `ItemSearch`, which performs a search and returns the resulting `ItemCollection` with all assets signed. The following example demonstrates these use cases: +Alternatively, you can manually call ``planetary_computer.sign`` on your results. ```python from pystac import Asset, Item, ItemCollection @@ -80,6 +79,35 @@ search = ItemSearch( signed_item_collection = pc.sign(search) ``` +### Convenience methods + +You'll occasionally need to interact with the Blob Storage container directly, rather than +using STAC items. We include two convenience methods for this: + +* `planetary_computer.get_container_client`: Get an [`azure.storage.blob.ContainerClient`](https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.containerclient?view=azure-python) +* `planetary_computer.get_adlfs_fliesystem`: Get an [`adlfs.AzureBlobFilesystem`](https://github.com/fsspec/adlfs) + +## Development + +The following steps may be followed in order to develop locally: + +```bash +## Create and activate venv +python3 -m venv env +source env/bin/activate + +## Install requirements +python3 -m pip install -r requirements-dev.txt + +## Install locally +pip install -e . + +## Format code +./scripts/format + +## Run tests +./scripts/test +``` ## Contributing diff --git a/planetary_computer/__init__.py b/planetary_computer/__init__.py index 7a18a24..dc6a51c 100644 --- a/planetary_computer/__init__.py +++ b/planetary_computer/__init__.py @@ -11,13 +11,17 @@ sign_item_collection, ) from planetary_computer.settings import set_subscription_key +from planetary_computer._adlfs import get_adlfs_filesystem, get_container_client from planetary_computer.version import __version__ __all__ = [ + "get_adlfs_filesystem", + "get_container_client", "set_subscription_key", "sign_asset", "sign_assets", + "sign_inplace", "sign_item_collection", "sign_item", "sign_url", diff --git a/planetary_computer/_adlfs.py b/planetary_computer/_adlfs.py new file mode 100644 index 0000000..f485947 --- /dev/null +++ b/planetary_computer/_adlfs.py @@ -0,0 +1,53 @@ +import typing + +import azure.storage.blob +from planetary_computer.sas import get_token + +if typing.TYPE_CHECKING: + import adlfs + + +def get_container_client( + account_name: str, container_name: str +) -> azure.storage.blob.ContainerClient: + """ + Get a :class:`azure.storage.blob.ContainerClient` with credentials. + + Args: + account_name (str): The storage account name. + container_name (str): The storage container name. + Returns: + The :class:`azure.storage.blob.ContainerClient` with the short-lived SAS token + set as the credential. + """ + token = get_token(account_name, container_name).token + return azure.storage.blob.ContainerClient( + f"https://{account_name}.blob.core.windows.net", + container_name, + credential=token, + ) + + +def get_adlfs_filesystem( + account_name: str, container_name: str +) -> "adlfs.AzureBlobFileSystem": + """ + Get an :class:`adlfs.AzureBlobFileSystem` with credentials. + + Args: + account_name (str): The storage account name. + container_name (str): The storage container name. + Returns: + The :class:`adlfs.AzureBlobFileSystem` with the short-lived SAS token + set as the credential. + """ + try: + import adlfs + except ImportError as e: + raise ImportError( + "'planetary_computer.get_adlfs_filesystem' requires " + "the optional dependency 'adlfs'." + ) from e + token = get_token(account_name, container_name).token + fs = adlfs.AzureBlobFileSystem(account_name, credential=token) + return fs diff --git a/planetary_computer/sas.py b/planetary_computer/sas.py index 42775aa..9ac61d3 100644 --- a/planetary_computer/sas.py +++ b/planetary_computer/sas.py @@ -14,6 +14,7 @@ from pystac.utils import datetime_to_str from pystac.serialization.identify import identify_stac_object_type from pystac_client import ItemSearch +import pystac_client import urllib3.util.retry from planetary_computer.settings import Settings @@ -336,7 +337,11 @@ def _search_and_sign(search: ItemSearch, copy: bool = True) -> ItemCollection: a "msft:expiry" property is added to the Item properties indicating the earliest expiry time for any assets that were signed. """ - return sign(search.get_all_items()) + if pystac_client.__version__ >= "0.5.0": + items = search.item_collection() + else: + items = search.get_all_items() + return sign(items) @sign.register(Collection) diff --git a/requirements-dev.txt b/requirements-dev.txt index 97cd710..f7e8cc4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,4 +3,5 @@ flake8==4.0.1 ipdb==0.13.9 mypy==0.961 types-requests==2.28.1 -setuptools==63.1.0 \ No newline at end of file +setuptools==63.1.0 +pytest diff --git a/scripts/cibuild b/scripts/cibuild index 82a8bcc..edaeec5 100755 --- a/scripts/cibuild +++ b/scripts/cibuild @@ -20,7 +20,7 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then # Install/upgrade dependencies python -m pip install --upgrade pip pip install -r requirements-dev.txt - pip install -e . + pip install -e .[adlfs] ./scripts/test fi diff --git a/scripts/test b/scripts/test index 66b92ee..0dbafe7 100755 --- a/scripts/test +++ b/scripts/test @@ -28,6 +28,6 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then flake8 planetary_computer tests echo "Running unit tests..." - python -m unittest discover tests + pytest -vs tests fi fi diff --git a/setup.cfg b/setup.cfg index d934a93..7e2d0dd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -19,6 +19,9 @@ install_requires = pytz>=2020.5 requests>=2.25.1 +[options.extras_require] +adlfs = adlfs + [options.entry_points] console_scripts = planetarycomputer = planetary_computer.scripts.cli:app diff --git a/tests/test_adlfs.py b/tests/test_adlfs.py new file mode 100644 index 0000000..1061d74 --- /dev/null +++ b/tests/test_adlfs.py @@ -0,0 +1,24 @@ +import sys + +import azure.storage.blob +import pytest + +import planetary_computer + + +def test_get_adlfs_filesystem_raises(monkeypatch): + monkeypatch.setitem(sys.modules, "adlfs", None) + with pytest.raises(ImportError): + planetary_computer.get_adlfs_filesystem("nrel", "oedi") + + +def test_get_adlfs_filesystem(): + fs = planetary_computer.get_adlfs_filesystem("nrel", "oedi") + assert fs.account_url == "https://nrel.blob.core.windows.net" + assert fs.credential is not None + + +def test_get_container_client(): + cc = planetary_computer.get_container_client("nrel", "oedi") + assert cc.primary_endpoint.startswith("https://nrel.blob.core.windows.net/oedi") + assert isinstance(cc, azure.storage.blob.ContainerClient) From 5fbb6571747e8d0eef83e19f5a902d33251ce998 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 3 Nov 2022 08:53:11 -0500 Subject: [PATCH 2/2] bump ci version --- .github/workflows/continuous-integration.yml | 13 +++---------- tests/test_adlfs.py | 7 ++++--- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index 3979820..807cb41 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -12,21 +12,14 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9"] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - name: Cache dependencies - uses: actions/cache@v2 - with: - path: ~/.cache/pip - key: pip-${{ hashFiles('requirements-dev.txt') }} - restore-keys: pip- - - name: Execute linters and test suites run: ./scripts/cibuild diff --git a/tests/test_adlfs.py b/tests/test_adlfs.py index 1061d74..978e4a2 100644 --- a/tests/test_adlfs.py +++ b/tests/test_adlfs.py @@ -1,4 +1,5 @@ import sys +from typing import Any import azure.storage.blob import pytest @@ -6,19 +7,19 @@ import planetary_computer -def test_get_adlfs_filesystem_raises(monkeypatch): +def test_get_adlfs_filesystem_raises(monkeypatch: Any) -> None: monkeypatch.setitem(sys.modules, "adlfs", None) with pytest.raises(ImportError): planetary_computer.get_adlfs_filesystem("nrel", "oedi") -def test_get_adlfs_filesystem(): +def test_get_adlfs_filesystem() -> None: fs = planetary_computer.get_adlfs_filesystem("nrel", "oedi") assert fs.account_url == "https://nrel.blob.core.windows.net" assert fs.credential is not None -def test_get_container_client(): +def test_get_container_client() -> None: cc = planetary_computer.get_container_client("nrel", "oedi") assert cc.primary_endpoint.startswith("https://nrel.blob.core.windows.net/oedi") assert isinstance(cc, azure.storage.blob.ContainerClient)