Skip to content

Commit

Permalink
Add Google Cloud Storage integration
Browse files Browse the repository at this point in the history
Via Google's google-cloud-storage library
  • Loading branch information
siboehm authored and fmarczin committed Feb 23, 2021
1 parent 8b59833 commit dd5a327
Show file tree
Hide file tree
Showing 12 changed files with 438 additions and 24 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ install: pip install tox coveralls
before_script:
- bash .travis/start_minio.sh
- docker run -p 10000:10000 mcr.microsoft.com/azure-storage/azurite azurite-blob --blobHost 0.0.0.0 &
- docker run -d --name fake-gcs-server -p 4443:4443 fsouza/fake-gcs-server -scheme http
- psql -c 'create database simplekv_test;' -U postgres
- psql -c 'ALTER ROLE travis CONNECTION LIMIT -1;' -U postgres
- mysql -e 'create database simplekv_test;'
Expand Down
5 changes: 5 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
Changelog
*********

0.15.0
======

* Add support for Google Cloud Storage through ``google-cloud-storage`` (for Python3).

0.14.1
======

Expand Down
53 changes: 53 additions & 0 deletions docs/gcstorage.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
Google Cloud Storage
****************************

This backend is for storing data in `Google Cloud Storage <https://cloud.google.com/storage>`_
by using the ``google-cloud-storage`` library.

``google-cloud-storage`` is only available for Python 3. Simplekv also provides access to
Google Cloud Storage through :class:`~simplekv.net.boto.BotoStore` using the ``boto`` library which is available for Python 2.


Note that ``google-cloud-storage`` is not a dependency for simplekv. You need to install it
manually, otherwise you will see an :exc:`~exceptions.ImportError`.

Here is a short example:

::

from simplekv.net.gcstore import GoogleCloudStore

credentials_path = "/path/to/credentials.json"

store = GoogleCloudStore(credentials=credentials_path, bucket_name="test_bucket")

# store some data in the store
store.put("first-key", b"Hello Google Cloud!")

# print out what's behind first-key. You should now see
# the key in the bucket as well
print store.get("first-key")


Testing
=======

The tests for the google cloud storage backend either

* use a real google cloud storage account
* use the `Fake GCS Server <https://github.com/fsouza/fake-gcs-server>`_ storage emulator

The travis tests use the second method. Caution: Some methods (deleting buckets, copying blobs, ...) aren't implemented
in the emulator and should therefore be tested using a real cloud storage account.

To test with a real blob store account, edit the file ``google_cloud_credentials.ini``
s.t. the first config section contains the path to the ``credentials.json`` of your test account.

To test against a locally running Fake GCS Server instance make sure to start the docker container::

docker run -d --name fake-gcs-server -p 4443:4443 fsouza/fake-gcs-server -scheme http

before running the tests.

.. autoclass:: simplekv.net.gcstore.GoogleCloudStore
:members: __init__
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ Table of contents
filesystem
boto
azure
gcstorage
gae
memory
db
Expand Down
3 changes: 3 additions & 0 deletions google_cloud_credentials.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[google-cloud-tests]
emulator_endpoint=http://localhost:4443
credentials_json_path=
19 changes: 0 additions & 19 deletions simplekv/net/_azurestore_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,6 @@
import hashlib
import base64

LAZY_PROPERTY_ATTR_PREFIX = "_lazy_"


def lazy_property(fn):
"""Decorator that makes a property lazy-evaluated.
On first access, lazy properties are computed and saved
as instance attribute with the name `'_lazy_' + method_name`
Any subsequent property access then returns the cached value."""
attr_name = LAZY_PROPERTY_ATTR_PREFIX + fn.__name__

@property
def _lazy_property(self):
if not hasattr(self, attr_name):
setattr(self, attr_name, fn(self))
return getattr(self, attr_name)

return _lazy_property


def _file_md5(file_, b64encode=True):
"""
Expand Down
4 changes: 1 addition & 3 deletions simplekv/net/_azurestore_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@
from ._azurestore_common import (
_byte_buffer_md5,
_file_md5,
lazy_property,
LAZY_PROPERTY_ATTR_PREFIX,
)

from ._net_common import lazy_property, LAZY_PROPERTY_ATTR_PREFIX

if PY2:

Expand Down
8 changes: 6 additions & 2 deletions simplekv/net/_azurestore_old.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
import io
from contextlib import contextmanager

from ._azurestore_common import _byte_buffer_md5, _file_md5, _filename_md5,\
lazy_property, LAZY_PROPERTY_ATTR_PREFIX
from ._azurestore_common import (
_byte_buffer_md5,
_file_md5,
_filename_md5,
)
from ._net_common import lazy_property, LAZY_PROPERTY_ATTR_PREFIX

from .._compat import binary_type
from .. import KeyValueStore
Expand Down
18 changes: 18 additions & 0 deletions simplekv/net/_net_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
LAZY_PROPERTY_ATTR_PREFIX = "_lazy_"


def lazy_property(fn):
"""Decorator that makes a property lazy-evaluated.
On first access, lazy properties are computed and saved
as instance attribute with the name `'_lazy_' + method_name`
Any subsequent property access then returns the cached value."""
attr_name = LAZY_PROPERTY_ATTR_PREFIX + fn.__name__

@property
def _lazy_property(self):
if not hasattr(self, attr_name):
setattr(self, attr_name, fn(self))
return getattr(self, attr_name)

return _lazy_property
188 changes: 188 additions & 0 deletions simplekv/net/gcstore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import io
from typing import Union
from ._net_common import lazy_property, LAZY_PROPERTY_ATTR_PREFIX
from .. import KeyValueStore
from google.auth.credentials import Credentials
from google.cloud.storage import Client, Blob
from google.cloud.exceptions import NotFound, GoogleCloudError


class GoogleCloudStore(KeyValueStore):
def __init__(
self,
credentials: Union[str, Credentials],
bucket_name: str,
create_if_missing=True,
create_bucket_location="EUROPE-WEST3",
project=None,
):
"""A store using `Google Cloud storage <https://cloud.google.com/storage>`_ as a backend.
:param: credentials: Either the path to a `credentials JSON file
<https://cloud.google.com/docs/authentication/production>`_
or an instance of *google.auth.credentials.Credentials*.
:param bucket_name: Name of the bucket the blobs will be stored in.
Needs to follow the `naming conventions
<https://cloud.google.com/storage/docs/naming-buckets>`_.
:param create_if_missing: Creates the bucket if it doesn't exist yet.
The bucket's ACL will be the default `ACL
<https://cloud.google.com/storage/docs/access-control/lists#default>`_.
:param create_bucket_location: Location to create the bucket in,
if the bucket doesn't exist yet. One of `Bucket locations
<https://cloud.google.com/storage/docs/locations>`_.
:param project: name of the project. If credentials JSON is passed,
value of *project* will be ignored as it can be inferred.
"""
self._credentials = credentials
self.bucket_name = bucket_name
self.create_if_missing = create_if_missing
self.create_bucket_location = create_bucket_location
self.project_name = project

# this exists to allow the store to be pickled even though the underlying gc client
# doesn't support pickling. We make pickling work by omitting self.client from __getstate__
# and just (re)creating the client & bucket when they're used (again).
@lazy_property
def _bucket(self):
if self.create_if_missing and not self._client.lookup_bucket(self.bucket_name):
return self._client.create_bucket(
bucket_or_name=self.bucket_name, location=self.create_bucket_location
)
else:
# will raise an error if bucket not found
return self._client.get_bucket(self.bucket_name)

@lazy_property
def _client(self):
if type(self._credentials) == str:
return Client.from_service_account_json(self._credentials)
else:
return Client(credentials=self._credentials, project=self.project_name)

def _delete(self, key: str):
try:
self._bucket.delete_blob(key)
except NotFound:
# simpleKV doesn't raise an error if key doesn't exist
pass

def _get(self, key: str):
blob = Blob(name=key, bucket=self._bucket)
try:
blob_bytes = blob.download_as_bytes()
except NotFound:
raise KeyError(key)
return blob_bytes

def _get_file(self, key: str, file):
blob = Blob(name=key, bucket=self._bucket)
try:
blob.download_to_file(file)
except NotFound:
raise KeyError

def _has_key(self, key: str):
return Blob(key, self._bucket).exists()

def iter_keys(self, prefix=""):
return (blob.name for blob in self._bucket.list_blobs(prefix=prefix))

def _open(self, key: str):
blob = Blob(key, self._bucket)
if not blob.exists():
raise KeyError
return IOInterface(blob)

def _put(self, key: str, data: bytes):
blob = Blob(key, self._bucket)
if type(data) != bytes:
raise IOError(f"data has to be of type 'bytes', not {type(data)}")
blob.upload_from_string(data, content_type="application/octet-stream")
return key

def _put_file(self, key, file):
blob = Blob(key, self._bucket)
try:
blob.upload_from_file(file_obj=file)
except GoogleCloudError:
raise IOError
return key

# skips two items: bucket & client.
# These will be recreated after unpickling through the lazy_property decoorator
def __getstate__(self):
return {
key: value
for key, value in self.__dict__.items()
if not key.startswith(LAZY_PROPERTY_ATTR_PREFIX)
}


class IOInterface(io.BufferedIOBase):
"""
Class which provides a file-like interface to selectively read from a blob in the bucket.
"""

def __init__(self, blob: Blob):
super(IOInterface, self).__init__()
self.blob = blob

if blob.size is None:
blob.reload()
self.size = blob.size
self.pos = 0

def tell(self):
"""Returns he current offset as int. Always >= 0."""
if self.closed:
raise ValueError("I/O operation on closed file")
return self.pos

def read(self, size=-1):
"""Returns 'size' amount of bytes or less if there is no more data.
If no size is given all data is returned. size can be >= 0."""
if self.closed:
raise ValueError("I/O operation on closed file")
max_size = max(0, self.size - self.pos)
if size < 0 or size > max_size:
size = max_size
if size == 0:
return b""
blob_bytes = self.blob.download_as_bytes(
start=self.pos, end=self.pos + size - 1
)
self.pos += len(blob_bytes)
return blob_bytes

def seek(self, offset, whence=0):
"""Move to a new offset either relative or absolute. whence=0 is
absolute, whence=1 is relative, whence=2 is relative to the end.
Any relative or absolute seek operation which would result in a
negative position is undefined and that case can be ignored
in the implementation.
Any seek operation which moves the position after the stream
should succeed. tell() should report that position and read()
should return an empty bytes object."""
if self.closed:
raise ValueError("I/O operation on closed file")
if whence == 0:
if offset < 0:
raise IOError("seek would move position outside the file")
self.pos = offset
elif whence == 1:
if self.pos + offset < 0:
raise IOError("seek would move position outside the file")
self.pos += offset
elif whence == 2:
if self.size + offset < 0:
raise IOError("seek would move position outside the file")
self.pos = self.size + offset
return self.pos

def seekable(self):
return True

def readable(self):
return True
Loading

0 comments on commit dd5a327

Please sign in to comment.