From 134ae8011fce8d698bdf6d6c1807a75f109bb931 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 8 Jun 2017 12:46:30 -0700 Subject: [PATCH 1/8] Adding bare-minimum proto for converting legacy App Engine "Reference" pbs. --- .../google/cloud/datastore/_onestore_v3.proto | 16 ++ .../cloud/datastore/_onestore_v3_pb2.py | 184 ++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 datastore/google/cloud/datastore/_onestore_v3.proto create mode 100644 datastore/google/cloud/datastore/_onestore_v3_pb2.py diff --git a/datastore/google/cloud/datastore/_onestore_v3.proto b/datastore/google/cloud/datastore/_onestore_v3.proto new file mode 100644 index 000000000000..4d7501c78a8a --- /dev/null +++ b/datastore/google/cloud/datastore/_onestore_v3.proto @@ -0,0 +1,16 @@ +syntax = "proto2"; + +message Reference { + required string app = 13; + optional string name_space = 20; + required Path path = 14; + optional string database_id = 23; +} + +message Path { + repeated group Element = 1 { + required string type = 2; + optional int64 id = 3; + optional string name = 4; + } +} diff --git a/datastore/google/cloud/datastore/_onestore_v3_pb2.py b/datastore/google/cloud/datastore/_onestore_v3_pb2.py new file mode 100644 index 000000000000..e9861d17fd40 --- /dev/null +++ b/datastore/google/cloud/datastore/_onestore_v3_pb2.py @@ -0,0 +1,184 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: _onestore_v3.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='_onestore_v3.proto', + package='', + syntax='proto2', + serialized_pb=_b('\n\x12_onestore_v3.proto\"V\n\tReference\x12\x0b\n\x03\x61pp\x18\r \x02(\t\x12\x12\n\nname_space\x18\x14 \x01(\t\x12\x13\n\x04path\x18\x0e \x02(\x0b\x32\x05.Path\x12\x13\n\x0b\x64\x61tabase_id\x18\x17 \x01(\t\"Y\n\x04Path\x12\x1e\n\x07\x65lement\x18\x01 \x03(\n2\r.Path.Element\x1a\x31\n\x07\x45lement\x12\x0c\n\x04type\x18\x02 \x02(\t\x12\n\n\x02id\x18\x03 \x01(\x03\x12\x0c\n\x04name\x18\x04 \x01(\t') +) +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + + + + +_REFERENCE = _descriptor.Descriptor( + name='Reference', + full_name='Reference', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='app', full_name='Reference.app', index=0, + number=13, type=9, cpp_type=9, label=2, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='name_space', full_name='Reference.name_space', index=1, + number=20, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='path', full_name='Reference.path', index=2, + number=14, type=11, cpp_type=10, label=2, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='database_id', full_name='Reference.database_id', index=3, + number=23, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=22, + serialized_end=108, +) + + +_PATH_ELEMENT = _descriptor.Descriptor( + name='Element', + full_name='Path.Element', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='type', full_name='Path.Element.type', index=0, + number=2, type=9, cpp_type=9, label=2, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='id', full_name='Path.Element.id', index=1, + number=3, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='name', full_name='Path.Element.name', index=2, + number=4, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=150, + serialized_end=199, +) + +_PATH = _descriptor.Descriptor( + name='Path', + full_name='Path', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='element', full_name='Path.element', index=0, + number=1, type=10, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[_PATH_ELEMENT, ], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=110, + serialized_end=199, +) + +_REFERENCE.fields_by_name['path'].message_type = _PATH +_PATH_ELEMENT.containing_type = _PATH +_PATH.fields_by_name['element'].message_type = _PATH_ELEMENT +DESCRIPTOR.message_types_by_name['Reference'] = _REFERENCE +DESCRIPTOR.message_types_by_name['Path'] = _PATH + +Reference = _reflection.GeneratedProtocolMessageType('Reference', (_message.Message,), dict( + DESCRIPTOR = _REFERENCE, + __module__ = '_onestore_v3_pb2' + # @@protoc_insertion_point(class_scope:Reference) + )) +_sym_db.RegisterMessage(Reference) + +Path = _reflection.GeneratedProtocolMessageType('Path', (_message.Message,), dict( + + Element = _reflection.GeneratedProtocolMessageType('Element', (_message.Message,), dict( + DESCRIPTOR = _PATH_ELEMENT, + __module__ = '_onestore_v3_pb2' + # @@protoc_insertion_point(class_scope:Path.Element) + )) + , + DESCRIPTOR = _PATH, + __module__ = '_onestore_v3_pb2' + # @@protoc_insertion_point(class_scope:Path) + )) +_sym_db.RegisterMessage(Path) +_sym_db.RegisterMessage(Path.Element) + + +# @@protoc_insertion_point(module_scope) From 72e26b7787cf135f76f2fb033ffe430bf8f91c37 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 8 Jun 2017 13:45:23 -0700 Subject: [PATCH 2/8] Rough draft of working implementation of datastore Key.(to|from)_legacy_urlsafe. Needs more tests but wanted to get the PR in front of reviewers ASAP. --- datastore/google/cloud/datastore/key.py | 210 +++++++++++++++++++++++- datastore/tests/unit/test_key.py | 40 +++++ 2 files changed, 249 insertions(+), 1 deletion(-) diff --git a/datastore/google/cloud/datastore/key.py b/datastore/google/cloud/datastore/key.py index 5ae08c5642ca..fefc7dd49448 100644 --- a/datastore/google/cloud/datastore/key.py +++ b/datastore/google/cloud/datastore/key.py @@ -14,11 +14,28 @@ """Create / interact with Google Cloud Datastore keys.""" +import base64 import copy import six from google.cloud.proto.datastore.v1 import entity_pb2 as _entity_pb2 +from google.cloud._helpers import _to_bytes +from google.cloud.datastore import _onestore_v3_pb2 + + +_DATABASE_ID_TEMPLATE = ( + 'Received non-empty database ID: {!r}.\n' + 'urlsafe strings are not expected to encode a Reference that ' + 'contains a database ID.') +_BAD_ELEMENT_TEMPLATE = ( + 'At most one of ID and name can be set on an element. Received ' + 'id = {!r} and name = {!r}.') +_EMPTY_ELEMENT = ( + 'Exactly one of ID and name must be set on an element. ' + 'Encountered an element with neither set that was not the last ' + 'element of a path.') + class Key(object): """An immutable representation of a datastore Key. @@ -79,7 +96,7 @@ class Key(object): * namespace (string): A namespace identifier for the key. * project (string): The project associated with the key. - * parent (:class:`google.cloud.datastore.key.Key`): The parent of the key. + * parent (:class:`~google.cloud.datastore.key.Key`): The parent of the key. The project argument is required unless it has been set implicitly. """ @@ -281,6 +298,47 @@ def to_protobuf(self): return key + def to_legacy_urlsafe(self): + """Convert to a base64 encode urlsafe string for App Engine. + + This is intended to work with the "legacy" representation of a datastore + "Key" used within Google App Engine (a so-called "Reference"). This is + intended as a drop in for the value returned when using + ``ndb.Key(...).urlsafe()``. + + :rtype: bytes + :returns: ASCII bytes contain the key encoded as URL-safe base64. + """ + pass + + @classmethod + def from_legacy_urlsafe(cls, urlsafe): + """Convert urlsafe string to :class:`~google.cloud.datastore.key.Key`. + + This is intended to work with the "legacy" representation of a datastore + "Key" used within Google App Engine (a so-called "Reference"). This + assumes that ``urlsafe`` was created within an App Engine app via + something like ``ndb.Key(...).urlsafe()``. + + :type urlsafe: bytes or unicode + :param urlsafe: The base64 encoded (ASCII) string corresponding to a + datastore "Key" / "Reference". + + :rtype: :class:`~google.cloud.datastore.key.Key`. + :returns: The key corresponding to ``urlsafe``. + """ + urlsafe = _to_bytes(urlsafe, encoding='ascii') + raw_bytes = _urlsafe_b64decode(urlsafe) + + reference = _onestore_v3_pb2.Reference() + reference.ParseFromString(raw_bytes) + + project = _clean_app(reference.app) + namespace = _get_empty(reference.name_space, u'') + _check_database_id(reference.database_id) + flat_path = _get_flat_path(reference.path) + return cls(*flat_path, project=project, namespace=namespace) + @property def is_partial(self): """Boolean indicating if the key has an ID (or name). @@ -427,3 +485,153 @@ def _validate_project(project, parent): raise ValueError("A Key must have a project set.") return project + + +def _urlsafe_b64decode(urlsafe): + """Replacement for base64.urlsafe_b64decode. + + Borrowed from ``ndb.key._DecodeUrlSafe``, noting their comment: + + This is 3-4x faster than urlsafe_b64decode() + + :type urlsafe: bytes + :param urlsafe: The encoded ASCII string to be decoded + + :rtype: bytes + :returns: The value that was decoded. + """ + # This is 3-4x faster than urlsafe_b64decode() + return base64.b64decode( + urlsafe.replace(b'-', b'+').replace(b'_', b'/')) + + +def _urlsafe_b64encode(value): + """Replacement for ``base64.urlsafe_b64encode``. + + Borrowed from ``ndb.key.Key.urlsafe``, noting their comment: + + This is 3-4x faster than urlsafe_b64decode() + + (They meant "encode".) + + :type value: bytes + :param value: The value to be encoded. + + :rtype: bytes + :returns: The encoded ASCII string. + """ + urlsafe = base64.b64encode(value) + return urlsafe.rstrip(b'=').replace(b'+', b'-').replace(b'/', b'_') + + +def _clean_app(app_str): + """Clean a legacy (i.e. from App Engine) app string. + + :type app_str: str + :param app_str: The ``app`` value stored in a "Reference" pb. + + :rtype: str + :returns: The cleaned value. + """ + if app_str.startswith('s~') or app_str.startswith('e~'): + return app_str[2:] + elif app_str.startswith('dev~'): + return app_str[4:] + else: + return app_str + + +def _get_empty(value, empty_value): + """Check if a protobuf field is "empty". + + :type value: object + :param value: A basic field from a protobuf. + + :type empty_value: object + :param empty_value: The "empty" value for the same type as + ``value``. + """ + if value == empty_value: + return None + else: + return value + + +def _check_database_id(database_id): + """Make sure a "Reference" database ID is empty. + + :type database_id: unicode + :param database_id: The ``database_id`` field from a "Reference" protobuf. + + :raises: :exc:`ValueError` if the ``database_id`` is not empty. + """ + if database_id != u'': + msg = _DATABASE_ID_TEMPLATE.format(database_id) + raise ValueError(msg) + + +def _add_id_or_name(flat_path, element_pb, empty_allowed): + """Add the ID or name from an element to a list. + + :type flat_path: list + :param flat_path: List of accumulated path parts. + + :type element_pb: :class:`._onestore_v3_pb2.Path.Element` + :param element_pb: The element containing ID or name. + + :type empty_allowed: bool + :param empty_allowed: Indicates if neither ID or name need be set. If + :data:`False`, then **exactly** one of them must be. + + :raises: :exc:`ValueError` if 0 or 2 of ID/name are set (unless + ``empty_allowed=True`` and 0 are set). + """ + id_ = element_pb.id + name = element_pb.name + # NOTE: Below 0 and the empty string are the "null" values for their + # respective types, indicating that the value is unset. + if id_ == 0: + if name == u'': + if not empty_allowed: + raise ValueError(_EMPTY_ELEMENT) + else: + flat_path.append(name) + else: + if name == u'': + flat_path.append(id_) + else: + msg = _BAD_ELEMENT_TEMPLATE.format(id_, name) + raise ValueError(msg) + + +def _get_flat_path(path_pb): + """Convert a legacy "Path" protobuf to a flat path. + + For example + + Element { + type: "parent" + id: 59 + } + Element { + type: "child" + name: "naem" + } + + would convert to ``('parent', 59, 'child', 'naem')``. + + :type path_pb: :class:`._onestore_v3_pb2.Path` + :param path_pb: Legacy protobuf "Path" object (from a "Reference"). + + :rtype: tuple + :returns: The path parts from ``path_pb``. + """ + num_elts = len(path_pb.element) + last_index = num_elts - 1 + + result = [] + for index, element in enumerate(path_pb.element): + result.append(element.type) + _add_id_or_name(result, element, index == last_index) + + return tuple(result) diff --git a/datastore/tests/unit/test_key.py b/datastore/tests/unit/test_key.py index 904338368c02..4d81d243953e 100644 --- a/datastore/tests/unit/test_key.py +++ b/datastore/tests/unit/test_key.py @@ -372,6 +372,28 @@ def test_to_protobuf_w_no_kind(self): # Unset values are False-y. self.assertEqual(pb.path[0].kind, '') + def test_from_legacy_urlsafe(self): + klass = self._get_target_class() + # NOTE: This comes directly from a running (in the dev appserver) + # App Engine app. Created via: + # + # from google.appengine.ext import ndb + # key = ndb.Key( + # 'Parent', 59, 'Child', 'Feather', + # namespace='space', app='s~sample-app') + # urlsafe = key.urlsafe() + urlsafe = ( + b'agxzfnNhbXBsZS1hcHByHgsSBlBhcmVudBg7DAsSBUNoaWxkIgdGZ' + b'WF0aGVyDKIBBXNwYWNl') + key = klass.from_legacy_urlsafe(urlsafe) + self.assertEqual(key.project, 'sample-app') + self.assertEqual(key.namespace, 'space') + self.assertEqual(key.flat_path, ('Parent', 59, 'Child', 'Feather')) + # Also make sure we didn't accidentally set the parent. + self.assertIsNone(key._parent) + self.assertIsNotNone(key.parent) + self.assertIs(key._parent, key.parent) + def test_is_partial_no_name_or_id(self): key = self._make_one('KIND', project=self._DEFAULT_PROJECT) self.assertTrue(key.is_partial) @@ -431,3 +453,21 @@ def test_parent_multiple_calls(self): self.assertEqual(parent.path, _PARENT_PATH) new_parent = key.parent self.assertIs(parent, new_parent) + + +class Test__urlsafe_b64decode(unittest.TestCase): + + @staticmethod + def _call_fut(urlsafe): + from google.cloud.datastore.key import _urlsafe_b64decode + + return _urlsafe_b64decode(urlsafe) + + +class Test__urlsafe_b64encode(unittest.TestCase): + + @staticmethod + def _call_fut(value): + from google.cloud.datastore.key import _urlsafe_b64encode + + return _urlsafe_b64encode(value) From 36d9abc008c478b8ddd17af5d6c3d5357e813fb2 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 8 Jun 2017 14:16:41 -0700 Subject: [PATCH 3/8] Adding implementation for datastore Key.to_legacy_urlsafe(). Also resolved some lint issues (line too long) and restructed unit test to be able to re-use "stored" values. --- datastore/google/cloud/datastore/key.py | 54 ++++++++++++++++++++----- datastore/tests/unit/test_key.py | 43 +++++++++++++------- 2 files changed, 73 insertions(+), 24 deletions(-) diff --git a/datastore/google/cloud/datastore/key.py b/datastore/google/cloud/datastore/key.py index fefc7dd49448..a6f5fdc63d65 100644 --- a/datastore/google/cloud/datastore/key.py +++ b/datastore/google/cloud/datastore/key.py @@ -301,24 +301,30 @@ def to_protobuf(self): def to_legacy_urlsafe(self): """Convert to a base64 encode urlsafe string for App Engine. - This is intended to work with the "legacy" representation of a datastore - "Key" used within Google App Engine (a so-called "Reference"). This is - intended as a drop in for the value returned when using - ``ndb.Key(...).urlsafe()``. + This is intended to work with the "legacy" representation of a + datastore "Key" used within Google App Engine (a so-called + "Reference"). This is intended as a drop in for the value returned when + using ``ndb.Key(...).urlsafe()``. :rtype: bytes :returns: ASCII bytes contain the key encoded as URL-safe base64. """ - pass + reference = _onestore_v3_pb2.Reference( + app=self.project, + path=_to_legacy_path(self._path), # Avoid the copy. + name_space=self.namespace, + ) + raw_bytes = reference.SerializeToString() + return _urlsafe_b64encode(raw_bytes) @classmethod def from_legacy_urlsafe(cls, urlsafe): """Convert urlsafe string to :class:`~google.cloud.datastore.key.Key`. - This is intended to work with the "legacy" representation of a datastore - "Key" used within Google App Engine (a so-called "Reference"). This - assumes that ``urlsafe`` was created within an App Engine app via - something like ``ndb.Key(...).urlsafe()``. + This is intended to work with the "legacy" representation of a + datastore "Key" used within Google App Engine (a so-called + "Reference"). This assumes that ``urlsafe`` was created within an App + Engine app via something like ``ndb.Key(...).urlsafe()``. :type urlsafe: bytes or unicode :param urlsafe: The base64 encoded (ASCII) string corresponding to a @@ -635,3 +641,33 @@ def _get_flat_path(path_pb): _add_id_or_name(result, element, index == last_index) return tuple(result) + + +def _to_legacy_path(dict_path): + """Convert a tuple of ints and strings in a legacy "Path". + + .. note: + + This assumes, but does not verify, that each entry in + ``dict_path`` is valid (i.e. doesn't have more than one + key out of "name" / "id"). + + :type dict_path: lsit + :param dict_path: The "structured" path for a key, i.e. it + is a list of dictionaries, each of which has + "kind" and one of "name" / "id" as keys. + + :rtype: :class:`._onestore_v3_pb2.Path` + :returns: The legacy path corresponding to ``dict_path``. + """ + elements = [] + for part in dict_path: + element_kwargs = {'type': part['kind']} + if 'id' in part: + element_kwargs['id'] = part['id'] + elif 'name' in part: + element_kwargs['name'] = part['name'] + element = _onestore_v3_pb2.Path.Element(**element_kwargs) + elements.append(element) + + return _onestore_v3_pb2.Path(element=elements) diff --git a/datastore/tests/unit/test_key.py b/datastore/tests/unit/test_key.py index 4d81d243953e..54600f986bf5 100644 --- a/datastore/tests/unit/test_key.py +++ b/datastore/tests/unit/test_key.py @@ -18,6 +18,20 @@ class TestKey(unittest.TestCase): _DEFAULT_PROJECT = 'PROJECT' + # NOTE: This comes directly from a running (in the dev appserver) + # App Engine app. Created via: + # + # from google.appengine.ext import ndb + # key = ndb.Key( + # 'Parent', 59, 'Child', 'Feather', + # namespace='space', app='s~sample-app') + # urlsafe = key.urlsafe() + _URLSAFE_EXAMPLE = ( + b'agxzfnNhbXBsZS1hcHByHgsSBlBhcmVudBg7DAsSBUNoaWxkIgdGZ' + b'WF0aGVyDKIBBXNwYWNl') + _URLSAFE_APP = 's~sample-app' + _URLSAFE_NAMESPACE = 'space' + _URLSAFE_FLAT_PATH = ('Parent', 59, 'Child', 'Feather') @staticmethod def _get_target_class(): @@ -372,23 +386,22 @@ def test_to_protobuf_w_no_kind(self): # Unset values are False-y. self.assertEqual(pb.path[0].kind, '') + def test_to_legacy_urlsafe(self): + key = self._make_one( + *self._URLSAFE_FLAT_PATH, + project=self._URLSAFE_APP, + namespace=self._URLSAFE_NAMESPACE) + # NOTE: ``key.project`` is somewhat "invalid" but that is OK. + urlsafe = key.to_legacy_urlsafe() + self.assertEqual(urlsafe, self._URLSAFE_EXAMPLE) + def test_from_legacy_urlsafe(self): klass = self._get_target_class() - # NOTE: This comes directly from a running (in the dev appserver) - # App Engine app. Created via: - # - # from google.appengine.ext import ndb - # key = ndb.Key( - # 'Parent', 59, 'Child', 'Feather', - # namespace='space', app='s~sample-app') - # urlsafe = key.urlsafe() - urlsafe = ( - b'agxzfnNhbXBsZS1hcHByHgsSBlBhcmVudBg7DAsSBUNoaWxkIgdGZ' - b'WF0aGVyDKIBBXNwYWNl') - key = klass.from_legacy_urlsafe(urlsafe) - self.assertEqual(key.project, 'sample-app') - self.assertEqual(key.namespace, 'space') - self.assertEqual(key.flat_path, ('Parent', 59, 'Child', 'Feather')) + key = klass.from_legacy_urlsafe(self._URLSAFE_EXAMPLE) + + self.assertEqual('s~' + key.project, self._URLSAFE_APP) + self.assertEqual(key.namespace, self._URLSAFE_NAMESPACE) + self.assertEqual(key.flat_path, self._URLSAFE_FLAT_PATH) # Also make sure we didn't accidentally set the parent. self.assertIsNone(key._parent) self.assertIsNotNone(key.parent) From 92779fd3ceba4e70ec2041f2ceb40f1e953a0be1 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 8 Jun 2017 14:17:49 -0700 Subject: [PATCH 4/8] Adding _onestore_v3_pb2 to ignored files for flake8. --- datastore/.flake8 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datastore/.flake8 b/datastore/.flake8 index 25168dc87605..0325d8cef0f1 100644 --- a/datastore/.flake8 +++ b/datastore/.flake8 @@ -1,5 +1,10 @@ [flake8] exclude = + # Datastore includes generated code in the manual layer; + # do not lint this. + google/cloud/datastore/_onestore_v3_pb2.py, + + # Standard linting exemptions. __pycache__, .git, *.pyc, From 22097ffac790d72e7d98ba0fe485872b881528fa Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 8 Jun 2017 14:21:13 -0700 Subject: [PATCH 5/8] Addressing @jonparrott feedback. In particular: - Just splitting on ~ when cleaning app strings - Rewording to_legacy_urlsafe() docstring to invoke `ndb.Key(urlsafe=...)` and to restate the "returns" text - Removing the _urlsafe_b64(decode|encode) micro-optimizations that were brought over from the ndb codebase --- datastore/google/cloud/datastore/key.py | 55 ++++--------------------- 1 file changed, 7 insertions(+), 48 deletions(-) diff --git a/datastore/google/cloud/datastore/key.py b/datastore/google/cloud/datastore/key.py index a6f5fdc63d65..7c7e1836db91 100644 --- a/datastore/google/cloud/datastore/key.py +++ b/datastore/google/cloud/datastore/key.py @@ -303,11 +303,11 @@ def to_legacy_urlsafe(self): This is intended to work with the "legacy" representation of a datastore "Key" used within Google App Engine (a so-called - "Reference"). This is intended as a drop in for the value returned when - using ``ndb.Key(...).urlsafe()``. + "Reference"). The returned string can be used as the ``urlsafe`` + argument to ``ndb.Key(urlsafe=...)``. :rtype: bytes - :returns: ASCII bytes contain the key encoded as URL-safe base64. + :returns: A bytestring containing the key encoded as URL-safe base64. """ reference = _onestore_v3_pb2.Reference( app=self.project, @@ -315,7 +315,7 @@ def to_legacy_urlsafe(self): name_space=self.namespace, ) raw_bytes = reference.SerializeToString() - return _urlsafe_b64encode(raw_bytes) + return base64.urlsafe_b64encode(raw_bytes) @classmethod def from_legacy_urlsafe(cls, urlsafe): @@ -334,7 +334,7 @@ def from_legacy_urlsafe(cls, urlsafe): :returns: The key corresponding to ``urlsafe``. """ urlsafe = _to_bytes(urlsafe, encoding='ascii') - raw_bytes = _urlsafe_b64decode(urlsafe) + raw_bytes = base64.urlsafe_b64decode(urlsafe) reference = _onestore_v3_pb2.Reference() reference.ParseFromString(raw_bytes) @@ -493,43 +493,6 @@ def _validate_project(project, parent): return project -def _urlsafe_b64decode(urlsafe): - """Replacement for base64.urlsafe_b64decode. - - Borrowed from ``ndb.key._DecodeUrlSafe``, noting their comment: - - This is 3-4x faster than urlsafe_b64decode() - - :type urlsafe: bytes - :param urlsafe: The encoded ASCII string to be decoded - - :rtype: bytes - :returns: The value that was decoded. - """ - # This is 3-4x faster than urlsafe_b64decode() - return base64.b64decode( - urlsafe.replace(b'-', b'+').replace(b'_', b'/')) - - -def _urlsafe_b64encode(value): - """Replacement for ``base64.urlsafe_b64encode``. - - Borrowed from ``ndb.key.Key.urlsafe``, noting their comment: - - This is 3-4x faster than urlsafe_b64decode() - - (They meant "encode".) - - :type value: bytes - :param value: The value to be encoded. - - :rtype: bytes - :returns: The encoded ASCII string. - """ - urlsafe = base64.b64encode(value) - return urlsafe.rstrip(b'=').replace(b'+', b'-').replace(b'/', b'_') - - def _clean_app(app_str): """Clean a legacy (i.e. from App Engine) app string. @@ -539,12 +502,8 @@ def _clean_app(app_str): :rtype: str :returns: The cleaned value. """ - if app_str.startswith('s~') or app_str.startswith('e~'): - return app_str[2:] - elif app_str.startswith('dev~'): - return app_str[4:] - else: - return app_str + parts = app_str.split('~', 1) + return parts[-1] def _get_empty(value, empty_value): From bb87e6278837e77e1a93b5bde46a4466ed572b83 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 8 Jun 2017 14:54:18 -0700 Subject: [PATCH 6/8] Adding test coverage for helpers needed for Key.(to|from)_legacy_urlsafe. --- datastore/.coveragerc | 2 + datastore/tests/unit/test_key.py | 240 +++++++++++++++++++++++++++++-- 2 files changed, 234 insertions(+), 8 deletions(-) diff --git a/datastore/.coveragerc b/datastore/.coveragerc index a54b99aa14b7..c462aa97654e 100644 --- a/datastore/.coveragerc +++ b/datastore/.coveragerc @@ -2,6 +2,8 @@ branch = True [report] +omit = + _onestore_v3_pb2.py fail_under = 100 show_missing = True exclude_lines = diff --git a/datastore/tests/unit/test_key.py b/datastore/tests/unit/test_key.py index 54600f986bf5..99bf420205f0 100644 --- a/datastore/tests/unit/test_key.py +++ b/datastore/tests/unit/test_key.py @@ -468,19 +468,243 @@ def test_parent_multiple_calls(self): self.assertIs(parent, new_parent) -class Test__urlsafe_b64decode(unittest.TestCase): +class Test__clean_app(unittest.TestCase): + + PROJECT = 'my-prahjekt' + + @staticmethod + def _call_fut(app_str): + from google.cloud.datastore.key import _clean_app + + return _clean_app(app_str) + + def test_already_clean(self): + app_str = self.PROJECT + self.assertEqual(self._call_fut(app_str), self.PROJECT) + + def test_standard(self): + app_str = 's~' + self.PROJECT + self.assertEqual(self._call_fut(app_str), self.PROJECT) + + def test_european(self): + app_str = 'e~' + self.PROJECT + self.assertEqual(self._call_fut(app_str), self.PROJECT) + + def test_dev_server(self): + app_str = 'dev~' + self.PROJECT + self.assertEqual(self._call_fut(app_str), self.PROJECT) + + +class Test__get_empty(unittest.TestCase): + + @staticmethod + def _call_fut(value, empty_value): + from google.cloud.datastore.key import _get_empty + + return _get_empty(value, empty_value) + + def test_unset(self): + for empty_value in (u'', 0, 0.0, []): + ret_val = self._call_fut(empty_value, empty_value) + self.assertIsNone(ret_val) + + def test_actually_set(self): + value_pairs = ( + (u'hello', u''), + (10, 0), + (3.14, 0.0), + (['stuff', 'here'], []), + ) + for value, empty_value in value_pairs: + ret_val = self._call_fut(value, empty_value) + self.assertIs(ret_val, value) + + +class Test__check_database_id(unittest.TestCase): + + @staticmethod + def _call_fut(database_id): + from google.cloud.datastore.key import _check_database_id + + return _check_database_id(database_id) + + def test_empty_value(self): + ret_val = self._call_fut(u'') + # Really we are just happy there was no exception. + self.assertIsNone(ret_val) + + def test_failure(self): + with self.assertRaises(ValueError): + self._call_fut(u'some-database-id') + + +class Test__add_id_or_name(unittest.TestCase): + + @staticmethod + def _call_fut(flat_path, element_pb, empty_allowed): + from google.cloud.datastore.key import _add_id_or_name + + return _add_id_or_name(flat_path, element_pb, empty_allowed) + + def test_add_id(self): + flat_path = [] + id_ = 123 + element_pb = _make_element_pb(id=id_) + + ret_val = self._call_fut(flat_path, element_pb, False) + self.assertIsNone(ret_val) + self.assertEqual(flat_path, [id_]) + ret_val = self._call_fut(flat_path, element_pb, True) + self.assertIsNone(ret_val) + self.assertEqual(flat_path, [id_, id_]) + + def test_add_name(self): + flat_path = [] + name = 'moon-shadow' + element_pb = _make_element_pb(name=name) + + ret_val = self._call_fut(flat_path, element_pb, False) + self.assertIsNone(ret_val) + self.assertEqual(flat_path, [name]) + ret_val = self._call_fut(flat_path, element_pb, True) + self.assertIsNone(ret_val) + self.assertEqual(flat_path, [name, name]) + + def test_both_present(self): + element_pb = _make_element_pb(id=17, name='seventeen') + flat_path = [] + with self.assertRaises(ValueError): + self._call_fut(flat_path, element_pb, False) + with self.assertRaises(ValueError): + self._call_fut(flat_path, element_pb, True) + + self.assertEqual(flat_path, []) + + def test_both_empty_failure(self): + element_pb = _make_element_pb() + flat_path = [] + with self.assertRaises(ValueError): + self._call_fut(flat_path, element_pb, False) + + self.assertEqual(flat_path, []) + + def test_both_empty_allowed(self): + element_pb = _make_element_pb() + flat_path = [] + ret_val = self._call_fut(flat_path, element_pb, True) + self.assertIsNone(ret_val) + self.assertEqual(flat_path, []) + + +class Test__get_flat_path(unittest.TestCase): @staticmethod - def _call_fut(urlsafe): - from google.cloud.datastore.key import _urlsafe_b64decode + def _call_fut(path_pb): + from google.cloud.datastore.key import _get_flat_path + + return _get_flat_path(path_pb) + + def test_one_pair(self): + kind = 'Widget' + name = 'Scooter' + element_pb = _make_element_pb(type=kind, name=name) + path_pb = _make_path_pb(element_pb) + flat_path = self._call_fut(path_pb) + self.assertEqual(flat_path, (kind, name)) + + def test_two_pairs(self): + kind1 = 'parent' + id1 = 59 + element_pb1 = _make_element_pb(type=kind1, id=id1) - return _urlsafe_b64decode(urlsafe) + kind2 = 'child' + name2 = 'naem' + element_pb2 = _make_element_pb(type=kind2, name=name2) + path_pb = _make_path_pb(element_pb1, element_pb2) + flat_path = self._call_fut(path_pb) + self.assertEqual(flat_path, (kind1, id1, kind2, name2)) -class Test__urlsafe_b64encode(unittest.TestCase): + def test_partial_key(self): + kind1 = 'grandparent' + name1 = 'cats' + element_pb1 = _make_element_pb(type=kind1, name=name1) + + kind2 = 'parent' + id2 = 1337 + element_pb2 = _make_element_pb(type=kind2, id=id2) + + kind3 = 'child' + element_pb3 = _make_element_pb(type=kind3) + + path_pb = _make_path_pb(element_pb1, element_pb2, element_pb3) + flat_path = self._call_fut(path_pb) + self.assertEqual(flat_path, (kind1, name1, kind2, id2, kind3)) + + +class Test__to_legacy_path(unittest.TestCase): @staticmethod - def _call_fut(value): - from google.cloud.datastore.key import _urlsafe_b64encode + def _call_fut(dict_path): + from google.cloud.datastore.key import _to_legacy_path + + return _to_legacy_path(dict_path) + + def test_one_pair(self): + kind = 'Widget' + name = 'Scooter' + dict_path = [{'kind': kind, 'name': name}] + path_pb = self._call_fut(dict_path) + + element_pb = _make_element_pb(type=kind, name=name) + expected_pb = _make_path_pb(element_pb) + self.assertEqual(path_pb, expected_pb) + + def test_two_pairs(self): + kind1 = 'parent' + id1 = 59 + + kind2 = 'child' + name2 = 'naem' + + dict_path = [{'kind': kind1, 'id': id1}, {'kind': kind2, 'name': name2}] + path_pb = self._call_fut(dict_path) + + element_pb1 = _make_element_pb(type=kind1, id=id1) + element_pb2 = _make_element_pb(type=kind2, name=name2) + expected_pb = _make_path_pb(element_pb1, element_pb2) + self.assertEqual(path_pb, expected_pb) + + def test_partial_key(self): + kind1 = 'grandparent' + name1 = 'cats' + + kind2 = 'parent' + id2 = 1337 + + kind3 = 'child' + + dict_path = [ + {'kind': kind1, 'name': name1}, + {'kind': kind2, 'id': id2}, + {'kind': kind3}, + ] + path_pb = self._call_fut(dict_path) + + element_pb1 = _make_element_pb(type=kind1, name=name1) + element_pb2 = _make_element_pb(type=kind2, id=id2) + element_pb3 = _make_element_pb(type=kind3) + expected_pb = _make_path_pb(element_pb1, element_pb2, element_pb3) + self.assertEqual(path_pb, expected_pb) + + +def _make_element_pb(**kwargs): + from google.cloud.datastore import _onestore_v3_pb2 + + return _onestore_v3_pb2.Path.Element(**kwargs) + + +def _make_path_pb(*element_pbs): + from google.cloud.datastore import _onestore_v3_pb2 - return _urlsafe_b64encode(value) + return _onestore_v3_pb2.Path(element=element_pbs) From 30a84ffa36857b4eb7931a107fbaded778d95f5a Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 8 Jun 2017 14:57:18 -0700 Subject: [PATCH 7/8] Adding LICENSE header to hand-written legacy GAE proto. --- .../google/cloud/datastore/_onestore_v3.proto | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/datastore/google/cloud/datastore/_onestore_v3.proto b/datastore/google/cloud/datastore/_onestore_v3.proto index 4d7501c78a8a..7248f1a4e4ef 100644 --- a/datastore/google/cloud/datastore/_onestore_v3.proto +++ b/datastore/google/cloud/datastore/_onestore_v3.proto @@ -1,3 +1,17 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + syntax = "proto2"; message Reference { From d32361eafd8fc3729e8150185f34ece4136072b3 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Thu, 8 Jun 2017 15:05:35 -0700 Subject: [PATCH 8/8] Renaming _onestore_v3.proto --> _app_engine_key.proto. --- datastore/.coveragerc | 2 +- datastore/.flake8 | 2 +- ...nestore_v3.proto => _app_engine_key.proto} | 0 ...store_v3_pb2.py => _app_engine_key_pb2.py} | 24 +++++++++---------- datastore/google/cloud/datastore/key.py | 16 ++++++------- datastore/tests/unit/test_key.py | 8 +++---- 6 files changed, 26 insertions(+), 26 deletions(-) rename datastore/google/cloud/datastore/{_onestore_v3.proto => _app_engine_key.proto} (100%) rename datastore/google/cloud/datastore/{_onestore_v3_pb2.py => _app_engine_key_pb2.py} (87%) diff --git a/datastore/.coveragerc b/datastore/.coveragerc index c462aa97654e..1596e4637d3f 100644 --- a/datastore/.coveragerc +++ b/datastore/.coveragerc @@ -3,7 +3,7 @@ branch = True [report] omit = - _onestore_v3_pb2.py + _app_engine_key_pb2.py fail_under = 100 show_missing = True exclude_lines = diff --git a/datastore/.flake8 b/datastore/.flake8 index 0325d8cef0f1..2feb7fefea2a 100644 --- a/datastore/.flake8 +++ b/datastore/.flake8 @@ -2,7 +2,7 @@ exclude = # Datastore includes generated code in the manual layer; # do not lint this. - google/cloud/datastore/_onestore_v3_pb2.py, + google/cloud/datastore/_app_engine_key_pb2.py, # Standard linting exemptions. __pycache__, diff --git a/datastore/google/cloud/datastore/_onestore_v3.proto b/datastore/google/cloud/datastore/_app_engine_key.proto similarity index 100% rename from datastore/google/cloud/datastore/_onestore_v3.proto rename to datastore/google/cloud/datastore/_app_engine_key.proto diff --git a/datastore/google/cloud/datastore/_onestore_v3_pb2.py b/datastore/google/cloud/datastore/_app_engine_key_pb2.py similarity index 87% rename from datastore/google/cloud/datastore/_onestore_v3_pb2.py rename to datastore/google/cloud/datastore/_app_engine_key_pb2.py index e9861d17fd40..bbb1c75b80df 100644 --- a/datastore/google/cloud/datastore/_onestore_v3_pb2.py +++ b/datastore/google/cloud/datastore/_app_engine_key_pb2.py @@ -1,5 +1,5 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! -# source: _onestore_v3.proto +# source: _app_engine_key.proto import sys _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) @@ -16,10 +16,10 @@ DESCRIPTOR = _descriptor.FileDescriptor( - name='_onestore_v3.proto', + name='_app_engine_key.proto', package='', syntax='proto2', - serialized_pb=_b('\n\x12_onestore_v3.proto\"V\n\tReference\x12\x0b\n\x03\x61pp\x18\r \x02(\t\x12\x12\n\nname_space\x18\x14 \x01(\t\x12\x13\n\x04path\x18\x0e \x02(\x0b\x32\x05.Path\x12\x13\n\x0b\x64\x61tabase_id\x18\x17 \x01(\t\"Y\n\x04Path\x12\x1e\n\x07\x65lement\x18\x01 \x03(\n2\r.Path.Element\x1a\x31\n\x07\x45lement\x12\x0c\n\x04type\x18\x02 \x02(\t\x12\n\n\x02id\x18\x03 \x01(\x03\x12\x0c\n\x04name\x18\x04 \x01(\t') + serialized_pb=_b('\n\x15_app_engine_key.proto\"V\n\tReference\x12\x0b\n\x03\x61pp\x18\r \x02(\t\x12\x12\n\nname_space\x18\x14 \x01(\t\x12\x13\n\x04path\x18\x0e \x02(\x0b\x32\x05.Path\x12\x13\n\x0b\x64\x61tabase_id\x18\x17 \x01(\t\"Y\n\x04Path\x12\x1e\n\x07\x65lement\x18\x01 \x03(\n2\r.Path.Element\x1a\x31\n\x07\x45lement\x12\x0c\n\x04type\x18\x02 \x02(\t\x12\n\n\x02id\x18\x03 \x01(\x03\x12\x0c\n\x04name\x18\x04 \x01(\t') ) _sym_db.RegisterFileDescriptor(DESCRIPTOR) @@ -73,8 +73,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=22, - serialized_end=108, + serialized_start=25, + serialized_end=111, ) @@ -118,8 +118,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=150, - serialized_end=199, + serialized_start=153, + serialized_end=202, ) _PATH = _descriptor.Descriptor( @@ -148,8 +148,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=110, - serialized_end=199, + serialized_start=113, + serialized_end=202, ) _REFERENCE.fields_by_name['path'].message_type = _PATH @@ -160,7 +160,7 @@ Reference = _reflection.GeneratedProtocolMessageType('Reference', (_message.Message,), dict( DESCRIPTOR = _REFERENCE, - __module__ = '_onestore_v3_pb2' + __module__ = '_app_engine_key_pb2' # @@protoc_insertion_point(class_scope:Reference) )) _sym_db.RegisterMessage(Reference) @@ -169,12 +169,12 @@ Element = _reflection.GeneratedProtocolMessageType('Element', (_message.Message,), dict( DESCRIPTOR = _PATH_ELEMENT, - __module__ = '_onestore_v3_pb2' + __module__ = '_app_engine_key_pb2' # @@protoc_insertion_point(class_scope:Path.Element) )) , DESCRIPTOR = _PATH, - __module__ = '_onestore_v3_pb2' + __module__ = '_app_engine_key_pb2' # @@protoc_insertion_point(class_scope:Path) )) _sym_db.RegisterMessage(Path) diff --git a/datastore/google/cloud/datastore/key.py b/datastore/google/cloud/datastore/key.py index 7c7e1836db91..166a5afde46b 100644 --- a/datastore/google/cloud/datastore/key.py +++ b/datastore/google/cloud/datastore/key.py @@ -21,7 +21,7 @@ from google.cloud.proto.datastore.v1 import entity_pb2 as _entity_pb2 from google.cloud._helpers import _to_bytes -from google.cloud.datastore import _onestore_v3_pb2 +from google.cloud.datastore import _app_engine_key_pb2 _DATABASE_ID_TEMPLATE = ( @@ -309,7 +309,7 @@ def to_legacy_urlsafe(self): :rtype: bytes :returns: A bytestring containing the key encoded as URL-safe base64. """ - reference = _onestore_v3_pb2.Reference( + reference = _app_engine_key_pb2.Reference( app=self.project, path=_to_legacy_path(self._path), # Avoid the copy. name_space=self.namespace, @@ -336,7 +336,7 @@ def from_legacy_urlsafe(cls, urlsafe): urlsafe = _to_bytes(urlsafe, encoding='ascii') raw_bytes = base64.urlsafe_b64decode(urlsafe) - reference = _onestore_v3_pb2.Reference() + reference = _app_engine_key_pb2.Reference() reference.ParseFromString(raw_bytes) project = _clean_app(reference.app) @@ -541,7 +541,7 @@ def _add_id_or_name(flat_path, element_pb, empty_allowed): :type flat_path: list :param flat_path: List of accumulated path parts. - :type element_pb: :class:`._onestore_v3_pb2.Path.Element` + :type element_pb: :class:`._app_engine_key_pb2.Path.Element` :param element_pb: The element containing ID or name. :type empty_allowed: bool @@ -585,7 +585,7 @@ def _get_flat_path(path_pb): would convert to ``('parent', 59, 'child', 'naem')``. - :type path_pb: :class:`._onestore_v3_pb2.Path` + :type path_pb: :class:`._app_engine_key_pb2.Path` :param path_pb: Legacy protobuf "Path" object (from a "Reference"). :rtype: tuple @@ -616,7 +616,7 @@ def _to_legacy_path(dict_path): is a list of dictionaries, each of which has "kind" and one of "name" / "id" as keys. - :rtype: :class:`._onestore_v3_pb2.Path` + :rtype: :class:`._app_engine_key_pb2.Path` :returns: The legacy path corresponding to ``dict_path``. """ elements = [] @@ -626,7 +626,7 @@ def _to_legacy_path(dict_path): element_kwargs['id'] = part['id'] elif 'name' in part: element_kwargs['name'] = part['name'] - element = _onestore_v3_pb2.Path.Element(**element_kwargs) + element = _app_engine_key_pb2.Path.Element(**element_kwargs) elements.append(element) - return _onestore_v3_pb2.Path(element=elements) + return _app_engine_key_pb2.Path(element=elements) diff --git a/datastore/tests/unit/test_key.py b/datastore/tests/unit/test_key.py index 99bf420205f0..5b89e146254d 100644 --- a/datastore/tests/unit/test_key.py +++ b/datastore/tests/unit/test_key.py @@ -699,12 +699,12 @@ def test_partial_key(self): def _make_element_pb(**kwargs): - from google.cloud.datastore import _onestore_v3_pb2 + from google.cloud.datastore import _app_engine_key_pb2 - return _onestore_v3_pb2.Path.Element(**kwargs) + return _app_engine_key_pb2.Path.Element(**kwargs) def _make_path_pb(*element_pbs): - from google.cloud.datastore import _onestore_v3_pb2 + from google.cloud.datastore import _app_engine_key_pb2 - return _onestore_v3_pb2.Path(element=element_pbs) + return _app_engine_key_pb2.Path(element=element_pbs)