Skip to content

Commit

Permalink
Merge pull request #76 from simonbohnen/s3-store-creation
Browse files Browse the repository at this point in the history
Refactor S3 store creation
  • Loading branch information
xhochy authored Feb 8, 2023
2 parents 20a5163 + e1ad1c7 commit e5ab466
Show file tree
Hide file tree
Showing 17 changed files with 461 additions and 49 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -110,5 +110,7 @@ Pipfile.lock

# minimalkv
store/
old/

# Exploratory code
exp.py
exploration_scripts/
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ repos:
- types-setuptools
- types-redis
- types-boto
- boto3-stubs
- repo: https://github.com/Quantco/pre-commit-mirrors-pyupgrade
rev: 3.1.0
hooks:
Expand Down
7 changes: 6 additions & 1 deletion docs/changes.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
Changelog
*********

1.6.1
1.7.0
=====
* Deprecated ``get_store``, ``url2dict``, and ``extract_params``.

* ``get_store_from_url`` should be used to create stores from a URL

* Added ``from_url`` and ``from_parsed_url`` to each store.

* Made the SQLAlchemyStore compatible with SQLAlchemy 2.0.

Expand Down
22 changes: 15 additions & 7 deletions minimalkv/_boto.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,21 @@ def _get_s3bucket(
from boto.s3.connection import S3ResponseError # type: ignore
from boto.s3.connection import OrdinaryCallingFormat, S3Connection

s3con = S3Connection(
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
host=host,
is_secure=False,
calling_format=OrdinaryCallingFormat(),
)
s3_connection_params = {
"aws_access_key_id": access_key,
"aws_secret_access_key": secret_key,
"is_secure": False,
"calling_format": OrdinaryCallingFormat(),
}

# Split up the host into host and port.
if ":" in host:
host, port = host.split(":")
s3_connection_params["port"] = int(port)
s3_connection_params["host"] = host

s3con = S3Connection(**s3_connection_params)

# add access key prefix to bucket name, unless explicitly prohibited
if force_bucket_suffix and not bucket.lower().endswith("-" + access_key.lower()):
bucket = bucket + "-" + access_key.lower()
Expand Down
109 changes: 99 additions & 10 deletions minimalkv/_get_store.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
from functools import reduce
from typing import Any
from typing import Any, Dict, List, Optional, Type

from uritools import SplitResult, urisplit

from minimalkv._key_value_store import KeyValueStore
from minimalkv._urls import url2dict


def get_store_from_url(url: str) -> KeyValueStore:
def get_store_from_url(
url: str, store_cls: Optional[Type[KeyValueStore]] = None
) -> KeyValueStore:
"""
Take a URL and return a minimalkv store according to the parameters in the URL.
Parameters
----------
url : str
Access-URL, see below for supported formats.
store_cls : Optional[Type[KeyValueStore]]
The class of the store to create.
If the URL scheme doesn't match the class, a ValueError is raised.
Returns
-------
store : KeyValueStore
Value Store as described in url.
KeyValueStore as described in url.
Notes
-----
Expand All @@ -41,18 +48,100 @@ def get_store_from_url(url: str) -> KeyValueStore:
* AzureBlockBlockStorage (SAS): ``azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false[?max_connections=2&socket_timeout=(20,100)]``
* AzureBlockBlockStorage (SAS): ``azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false[?max_connections=2&socket_timeout=(20,100)][?max_block_size=4*1024*1024&max_single_put_size=64*1024*1024]``
* GoogleCloudStorage: ``gcs://<base64 encoded credentials JSON>@bucket_name[?create_if_missing=true][&bucket_creation_location=EUROPE-WEST1]``
* S3FSStore ``s3://access_key:secret_key@endpoint/bucket[?create_if_missing=true]``
See the respective store's :func:`_from_parsed_url` function for more details.
"""
from minimalkv._hstores import HS3FSStore
from minimalkv.net.s3fsstore import S3FSStore

scheme_to_store: Dict[str, Type[KeyValueStore]] = {
"s3": S3FSStore,
"hs3": HS3FSStore,
"boto": HS3FSStore,
}

parsed_url = urisplit(url)
# Wrappers can be used to add functionality to a store, e.g. encryption.
# See the documentation of _extract_wrappers for details.
wrappers = _extract_wrappers(parsed_url)

# Remove wrappers from scheme
scheme_parts = parsed_url.getscheme().split("+")
# pop off the type of the store
scheme = scheme_parts[0]

if scheme not in scheme_to_store:
# If we can't find the scheme, we fall back to the old creation methods
return get_store(**url2dict(url))

store_cls_from_url = scheme_to_store[scheme]
if store_cls is not None and store_cls_from_url != store_cls:
raise ValueError(
f"URL scheme {scheme} does not match store class {store_cls.__name__}"
)

query_listdict: Dict[str, List[str]] = parsed_url.getquerydict()
# We will just use the last occurrence for each key
query = {k: v[-1] for k, v in query_listdict.items()}

store = store_cls_from_url._from_parsed_url(parsed_url, query)

# apply wrappers/decorators:
from minimalkv._store_decoration import decorate_store

wrapped_store = reduce(decorate_store, wrappers, store)

return wrapped_store


def _extract_wrappers(parsed_url: SplitResult) -> List[str]:
"""
Extract wrappers from a parsed URL.
Get the encoded credentials as string like so:
Wrappers allow you to add additional functionality to a store, e.g. encryption.
They can be specified in two ways:
1. As the fragment part of the URL, e.g. "s3://...#wrap:readonly+urlencode"
2. As part of the scheme, e.g. "s3+readonly+urlencode://..."
.. code-block:: python
The two methods cannot be mixed in the same URL.
from pathlib import Path
import base64
json_as_bytes = Path(<path_to_json>).read_bytes()
json_b64_encoded = base64.urlsafe_b64encode(b).decode()
Parameters
----------
parsed_url: SplitResult
The parsed URL.
Returns
-------
wrappers: List[str]
The list of wrappers.
"""
return get_store(**url2dict(url))
# Find wrappers in scheme, looking like this: "s3+readonly+urlencode://..."
parts = parsed_url.getscheme().split("+")
# pop off the type of the store
parts.pop(0)
scheme_wrappers = list(reversed(parts))

# Find fragment wrappers, looking like this: "s3://...#wrap:readonly+urlencode"
fragment = parsed_url.getfragment()
fragments = fragment.split("#") if fragment else []
wrap_spec = [s for s in fragments if s.startswith("wrap:")]
if wrap_spec:
fragment_without_wrap = wrap_spec[-1].partition("wrap:")[
2
] # remove the 'wrap:' part
fragment_wrappers = list(fragment_without_wrap.split("+"))
else:
fragment_wrappers = []

# can't have both:
if scheme_wrappers and fragment_wrappers:
raise ValueError(
"Adding store wrappers via both the scheme and the fragment is not allowed."
)

return scheme_wrappers + fragment_wrappers


def get_store(
Expand Down
10 changes: 10 additions & 0 deletions minimalkv/_hstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from minimalkv.memory import DictStore
from minimalkv.memory.redisstore import RedisStore
from minimalkv.net.azurestore import AzureBlockBlobStore
from minimalkv.net.boto3store import Boto3Store
from minimalkv.net.botostore import BotoStore
from minimalkv.net.gcstore import GoogleCloudStore
from minimalkv.net.s3fsstore import S3FSStore


class HDictStore(ExtendedKeyspaceMixin, DictStore): # noqa D
Expand Down Expand Up @@ -39,6 +41,14 @@ def size(self, key: str) -> bytes:
return k.size


class HS3FSStore(ExtendedKeyspaceMixin, S3FSStore): # noqa D
pass


class HBoto3Store(ExtendedKeyspaceMixin, Boto3Store): # noqa D
pass


class HGoogleCloudStore(ExtendedKeyspaceMixin, GoogleCloudStore): # noqa D
pass

Expand Down
29 changes: 27 additions & 2 deletions minimalkv/_key_value_store.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from io import BytesIO
from types import TracebackType
from typing import IO, Iterable, Iterator, List, Optional, Type, Union
from typing import IO, Dict, Iterable, Iterator, List, Optional, Type, Union

from uritools import SplitResult

from minimalkv._constants import VALID_KEY_RE
from minimalkv._mixins import UrlMixin
Expand Down Expand Up @@ -98,7 +100,7 @@ def get_file(self, key: str, file: Union[str, IO]) -> str:
implement a specialized function if data needs to be written to disk or streamed.
If ``file`` is a string, contents of ``key`` are written to a newly created file
with the filename ``file``. Otherwise the data will be written using the
with the filename ``file``. Otherwise, the data will be written using the
``write`` method of ``file``.
Parameters
Expand Down Expand Up @@ -462,6 +464,29 @@ def __exit__(
"""
self.close()

@classmethod
def _from_parsed_url(
cls, parsed_url: SplitResult, query: Dict[str, str]
) -> "KeyValueStore":
"""
Build a ``KeyValueStore`` from a parsed URL.
To build a ``KeyValueStore`` from a URL, use :func:`get_store_from_url`.
Parameters
----------
parsed_url: SplitResult
The parsed URL.
query: Dict[str, str]
Query parameters from the URL.
Returns
-------
store : KeyValueStore
The created KeyValueStore.
"""
raise NotImplementedError


class UrlKeyValueStore(UrlMixin, KeyValueStore):
"""Class is deprecated. Use the :class:`.UrlMixin` instead.
Expand Down
9 changes: 9 additions & 0 deletions minimalkv/_store_creation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import os.path
from typing import TYPE_CHECKING, Any, Dict
from warnings import warn

from minimalkv.fs import FilesystemStore

Expand All @@ -10,6 +11,14 @@

def create_store(type: str, params: Dict[str, Any]) -> "KeyValueStore":
"""Create store of type ``type`` with ``params``."""
warn(
"""
create_store will be removed in the next major release.
If you want to create a KeyValueStore from a URL, use get_store_from_url.
""",
DeprecationWarning,
stacklevel=2,
)
# TODO: More detailed docstring
if type in ("azure", "hazure"):
return _create_store_azure(type, params)
Expand Down
17 changes: 17 additions & 0 deletions minimalkv/_url_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from typing import Optional

from uritools import SplitResult


def _get_username(split_result: SplitResult) -> Optional[str]:
userinfo = split_result.getuserinfo()
if not userinfo:
return None
return userinfo.split(":")[0]


def _get_password(split_result: SplitResult) -> Optional[str]:
userinfo = split_result.getuserinfo()
if not userinfo or ":" not in userinfo:
return None
return userinfo.split(":")[1]
31 changes: 28 additions & 3 deletions minimalkv/_urls.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import base64
from typing import Any, Dict, List
from warnings import warn

from uritools import urisplit

Expand Down Expand Up @@ -32,8 +33,16 @@ def url2dict(url: str, raise_on_extra_params: bool = False) -> Dict[str, Any]:
``azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false[?max_connections=2&socket_timeout=(20,100)]``
``azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false[?max_connections=2&socket_timeout=(20,100)][?max_block_size=4*1024*1024&max_single_put_size=64*1024*1024]``
``gcs://<base64 encoded credentialsJSON>@bucket_name[?create_if_missing=true][?bucket_creation_location=EUROPE-WEST1]``
"""
warn(
"""
url2dict will be removed in the next major release.
If you want to create a KeyValueStore from a URL, use get_store_from_url.
""",
DeprecationWarning,
stacklevel=2,
)

u = urisplit(url)
parsed = dict(
scheme=u.getscheme(),
Expand Down Expand Up @@ -68,6 +77,15 @@ def url2dict(url: str, raise_on_extra_params: bool = False) -> Dict[str, Any]:


def extract_params(scheme, host, port, path, query, userinfo): # noqa D
warn(
"""
extract_params will be removed in the next major release.
If you want to create a KeyValueStore from a URL, use get_store_from_url.
""",
DeprecationWarning,
stacklevel=2,
)

# We want to ignore wrappers here
store_type = scheme.split("+")[0]

Expand Down Expand Up @@ -126,7 +144,7 @@ def extract_params(scheme, host, port, path, query, userinfo): # noqa D


def _parse_userinfo(userinfo: str) -> List[str]:
"""Try to split the URL's userinfo into fields separated by :.
"""Try to split the URL's userinfo into fields separated by `:`.
The user info is the part between ``://`` and ``@``. If anything looks wrong, remind
the user to percent-encode values.
Expand All @@ -140,8 +158,15 @@ def _parse_userinfo(userinfo: str) -> List[str]:
-------
parts: list of str
URL-encoded user-info split at ``:``.
"""
warn(
"""
_parse_userinfo will be removed in the next major release.
If you want to create a KeyValueStore from a URL, use get_store_from_url.
""",
DeprecationWarning,
stacklevel=2,
)
if hasattr(userinfo, "split"):
parts = userinfo.split(":", 1)

Expand Down
Loading

0 comments on commit e5ab466

Please sign in to comment.