Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow other fsspec protocols than local and s3 #126

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions virtualizarr/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import contextlib
import pathlib

import fsspec
import fsspec.implementations.local
import fsspec.implementations.memory
import pytest
import xarray as xr

from virtualizarr.utils import _fsspec_openfile_from_filepath


@pytest.fixture
def dataset() -> xr.Dataset:
return xr.Dataset(
{"x": xr.DataArray([10, 20, 30], dims="a", coords={"a": [0, 1, 2]})}
)


def test_fsspec_openfile_from_path(tmp_path: pathlib.Path, dataset: xr.Dataset) -> None:
f = tmp_path / "dataset.nc"
dataset.to_netcdf(f)

result = _fsspec_openfile_from_filepath(filepath=f.as_posix())
assert isinstance(result, fsspec.implementations.local.LocalFileOpener)


def test_fsspec_openfile_memory(dataset: xr.Dataset):
fs = fsspec.filesystem("memory")
with contextlib.redirect_stderr(None):
# Suppress "Exception ignored in: <function netcdf_file.close at ...>"
with fs.open("dataset.nc", mode="wb") as f:
dataset.to_netcdf(f, engine="h5netcdf")

result = _fsspec_openfile_from_filepath(filepath="memory://dataset.nc")
with result:
assert isinstance(result, fsspec.implementations.memory.MemoryFile)
48 changes: 21 additions & 27 deletions virtualizarr/utils.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Optional
import io
from typing import TYPE_CHECKING, Optional, Union

if TYPE_CHECKING:
from fsspec.implementations.local import LocalFileOpener
from s3fs.core import S3File
import fsspec.core
import fsspec.spec

# See pangeo_forge_recipes.storage
OpenFileType = Union[
fsspec.core.OpenFile, fsspec.spec.AbstractBufferedFile, io.IOBase
]


def _fsspec_openfile_from_filepath(
*,
filepath: str,
reader_options: Optional[dict] = {
"storage_options": {"key": "", "secret": "", "anon": True}
},
) -> S3File | LocalFileOpener:
reader_options: Optional[dict] = {},
) -> OpenFileType:
"""Converts input filepath to fsspec openfile object.

Parameters
Expand All @@ -25,8 +29,8 @@ def _fsspec_openfile_from_filepath(

Returns
-------
S3File | LocalFileOpener
Either S3File or LocalFileOpener, depending on which protocol was supplied.
OpenFileType
An open file-like object, specific to the protocol supplied in filepath.

Raises
------
Expand All @@ -40,25 +44,15 @@ def _fsspec_openfile_from_filepath(
universal_filepath = UPath(filepath)
protocol = universal_filepath.protocol

if protocol == "":
fpath = fsspec.open(filepath, "rb").open()

elif protocol in ["s3"]:
s3_anon_defaults = {"key": "", "secret": "", "anon": True}
if not bool(reader_options):
storage_options = s3_anon_defaults

else:
storage_options = reader_options.get("storage_options") # type: ignore

# using dict merge operator to add in defaults if keys are not specified
storage_options = s3_anon_defaults | storage_options
if protocol == "s3":
protocol_defaults = {"key": "", "secret": "", "anon": True}
else:
protocol_defaults = {}

fpath = fsspec.filesystem(protocol, **storage_options).open(filepath)
storage_options = reader_options.get("storage_options", {}) # type: ignore

else:
raise NotImplementedError(
"Only local and s3 file protocols are currently supported"
)
# using dict merge operator to add in defaults if keys are not specified
storage_options = protocol_defaults | storage_options
fpath = fsspec.filesystem(protocol, **storage_options).open(filepath)

return fpath
1 change: 1 addition & 0 deletions virtualizarr/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def open_virtual_dataset(
vds_refs = kerchunk.read_kerchunk_references_from_file(
filepath=filepath,
filetype=filetype,
reader_options=reader_options,
)
virtual_vars = virtual_vars_from_kerchunk_refs(
vds_refs,
Expand Down
Loading