Skip to content

Commit

Permalink
Removing cursor and more results from query object.
Browse files Browse the repository at this point in the history
This is a follow up to a discussion in googleapis#423 and googleapis#425.

Also
- Adding fetch_page() method to return the needed properties
  no longer available on the Query object.
- Made fetch_page() fail if more_results is `NOT_FINISHED`, which
  seems to be intended for batched requests.
- Added start_cursor and end_cursor properties to Query.
- Updated docstrings to reflect change
- Updated tests to ensure the returned cursor was always set.
- Streamlined fetch()/fetch_page() tests to run via a single
  tweakable helper.
- Updated a datastore.connection test to construct and return an
  initialized QueryResultBatch to simulate a valid cursor.
- Updated regression.clear_datastore and regression.datastore to
  reflect changed API surface.
  • Loading branch information
dhermes committed Dec 17, 2014
1 parent c2ba130 commit 2d40365
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 159 deletions.
9 changes: 6 additions & 3 deletions gcloud/datastore/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,13 @@ def run_query(self, dataset_id, query_pb, namespace=None):
Using the `fetch`` method...
>>> query.fetch()
>>> entities, cursor, more_results = query.fetch_page()
>>> entities
[<list of Entity unmarshalled from protobuf>]
>>> query.cursor()
>>> cursor
<string containing cursor where fetch stopped>
>>> more_results
<boolean of more results>
Under the hood this is doing...
Expand All @@ -318,7 +321,7 @@ def run_query(self, dataset_id, query_pb, namespace=None):
datastore_pb.RunQueryResponse)
return (
[e.entity for e in response.batch.entity_result],
response.batch.end_cursor,
response.batch.end_cursor, # Assume response always has cursor.
response.batch.more_results,
response.batch.skipped_results,
)
Expand Down
127 changes: 83 additions & 44 deletions gcloud/datastore/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class Query(object):
:param dataset: The namespace to which to restrict results.
"""

_MORE_RESULTS = datastore_pb.QueryResultBatch.MORE_RESULTS_AFTER_LIMIT
_NO_MORE_RESULTS = datastore_pb.QueryResultBatch.NO_MORE_RESULTS
OPERATORS = {
'<=': datastore_pb.PropertyFilter.LESS_THAN_OR_EQUAL,
'>=': datastore_pb.PropertyFilter.GREATER_THAN_OR_EQUAL,
Expand All @@ -69,7 +71,6 @@ def __init__(self, kind=None, dataset=None, namespace=None):
self._dataset = dataset
self._namespace = namespace
self._pb = datastore_pb.Query()
self._cursor = self._more_results = None
self._offset = 0

if kind:
Expand All @@ -84,8 +85,6 @@ def _clone(self):
clone = self.__class__(dataset=self._dataset,
namespace=self._namespace)
clone._pb.CopyFrom(self._pb)
clone._cursor = self._cursor
clone._more_results = self._more_results
return clone

def namespace(self):
Expand Down Expand Up @@ -239,8 +238,8 @@ def kind(self, *kinds):
:type kinds: string
:param kinds: The entity kinds for which to query.
:rtype: string or :class:`Query`
:returns: If no arguments, returns the kind.
:rtype: string, list of strings, or :class:`Query`
:returns: If no arguments, returns the kind or list of kinds.
If a kind is provided, returns a clone of the :class:`Query`
with those kinds set.
"""
Expand All @@ -250,7 +249,13 @@ def kind(self, *kinds):
clone._pb.kind.add().name = kind
return clone
else:
return self._pb.kind
# In the proto definition for Query, `kind` is repeated.
kind_names = [kind_expr.name for kind_expr in self._pb.kind]
num_kinds = len(kind_names)
if num_kinds == 1:
return kind_names[0]
elif num_kinds > 1:
return kind_names

def limit(self, limit=None):
"""Get or set the limit of the Query.
Expand Down Expand Up @@ -302,8 +307,12 @@ def dataset(self, dataset=None):
else:
return self._dataset

def fetch(self, limit=None):
"""Executes the Query and returns all matching entities.
def fetch_page(self, limit=None):
"""Executes the Query and returns matching entities, and paging info.
In addition to the fetched entities, it also returns a cursor to allow
paging through a results set and a boolean `more_results` indicating
if there are any more.
This makes an API call to the Cloud Datastore, sends the Query
as a protobuf, parses the responses to Entity protobufs, and
Expand All @@ -315,10 +324,10 @@ def fetch(self, limit=None):
>>> from gcloud import datastore
>>> dataset = datastore.get_dataset('dataset-id')
>>> query = dataset.query('Person').filter('name', '=', 'Sally')
>>> query.fetch()
[<Entity object>, <Entity object>, ...]
>>> query.fetch(1)
[<Entity object>]
>>> query.fetch_page()
[<Entity object>, <Entity object>, ...], 'cursorbase64', True
>>> query.fetch_page(1)
[<Entity object>], 'cursorbase64', True
>>> query.limit()
None
Expand All @@ -328,8 +337,13 @@ def fetch(self, limit=None):
but the limit will be applied to the query
before it is executed.
:rtype: list of :class:`gcloud.datastore.entity.Entity`'s
:returns: The list of entities matching this query's criteria.
:rtype: tuple of mixed types
:returns: The first entry is a :class:`gcloud.datastore.entity.Entity`
list matching this query's criteria. The second is a base64
encoded cursor for paging and the third is a boolean
indicating if there are more results.
:raises: `ValueError` if more_results is not one of the enums
MORE_RESULTS_AFTER_LIMIT or NO_MORE_RESULTS.
"""
clone = self

Expand All @@ -350,46 +364,71 @@ def fetch(self, limit=None):
# results. See
# https://github.com/GoogleCloudPlatform/gcloud-python/issues/280
# for discussion.
entity_pbs, self._cursor, self._more_results = query_results[:3]
entity_pbs, cursor_as_bytes, more_results_enum = query_results[:3]

return [helpers.entity_from_protobuf(entity, dataset=self.dataset())
for entity in entity_pbs]
entities = [helpers.entity_from_protobuf(entity,
dataset=self.dataset())
for entity in entity_pbs]

def cursor(self):
"""Returns cursor ID from most recent ``fetch()``.
cursor = base64.b64encode(cursor_as_bytes)

.. warning:: Invoking this method on a query that has not yet
been executed will raise a RuntimeError.
if more_results_enum == self._MORE_RESULTS:
more_results = True
elif more_results_enum == self._NO_MORE_RESULTS:
more_results = False
else:
# Note this covers the value NOT_FINISHED since this fetch does
# not occur within a batch, we don't expect to see NOT_FINISHED.
raise ValueError('Unexpected value returned for `more_results`.')

:rtype: string
:returns: base64-encoded cursor ID string denoting the last position
consumed in the query's result set.
"""
if not self._cursor:
raise RuntimeError('No cursor')
return base64.b64encode(self._cursor)
return entities, cursor, more_results

def more_results(self):
"""Returns ``more_results`` flag from most recent ``fetch()``.
def fetch(self, limit=None):
"""Executes the Query and returns matching entities
.. warning:: Invoking this method on a query that has not yet
been executed will raise a RuntimeError.
This calls `fetch_page()` but does not use the paging information.
.. note::
For example::
>>> from gcloud import datastore
>>> dataset = datastore.get_dataset('dataset-id')
>>> query = dataset.query('Person').filter('name', '=', 'Sally')
>>> query.fetch()
[<Entity object>, <Entity object>, ...]
>>> query.fetch(1)
[<Entity object>]
>>> query.limit()
None
The `more_results` is not currently useful because it is
always returned by the back-end as ``MORE_RESULTS_AFTER_LIMIT``
even if there are no more results. See
https://github.com/GoogleCloudPlatform/gcloud-python/issues/280
for discussion.
:type limit: integer
:param limit: An optional limit to apply temporarily to this query.
That is, the Query itself won't be altered,
but the limit will be applied to the query
before it is executed.
:rtype: :class:`gcloud.datastore.datastore_v1_pb2.
QueryResultBatch.MoreResultsType`
:returns: enumerated value: are there more results available.
:rtype: list of :class:`gcloud.datastore.entity.Entity`'s
:returns: The list of entities matching this query's criteria.
"""
if self._more_results is None:
raise RuntimeError('No results')
return self._more_results
entities, _, _ = self.fetch_page(limit=limit)
return entities

@property
def start_cursor(self):
"""Property to encode start cursor bytes as base64."""
if not self._pb.HasField('start_cursor'):
return None

start_as_bytes = self._pb.start_cursor
return base64.b64encode(start_as_bytes)

@property
def end_cursor(self):
"""Property to encode end cursor bytes as base64."""
if not self._pb.HasField('end_cursor'):
return None

end_as_bytes = self._pb.end_cursor
return base64.b64encode(end_as_bytes)

def with_cursor(self, start_cursor, end_cursor=None):
"""Specifies the starting / ending positions in a query's result set.
Expand Down
7 changes: 6 additions & 1 deletion gcloud/datastore/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,8 +449,13 @@ def test_run_query_wo_namespace_empty_result(self):

DATASET_ID = 'DATASET'
KIND = 'Nonesuch'
CURSOR = b'\x00'
q_pb = Query(KIND, DATASET_ID).to_protobuf()
rsp_pb = datastore_pb.RunQueryResponse()
rsp_pb.batch.end_cursor = CURSOR
no_more = datastore_pb.QueryResultBatch.NO_MORE_RESULTS
rsp_pb.batch.more_results = no_more
rsp_pb.batch.entity_result_type = datastore_pb.EntityResult.FULL
conn = self._makeOne()
URI = '/'.join([
conn.API_BASE_URL,
Expand All @@ -463,7 +468,7 @@ def test_run_query_wo_namespace_empty_result(self):
http = conn._http = Http({'status': '200'}, rsp_pb.SerializeToString())
pbs, end, more, skipped = conn.run_query(DATASET_ID, q_pb)
self.assertEqual(pbs, [])
self.assertEqual(end, '')
self.assertEqual(end, CURSOR)
self.assertTrue(more)
self.assertEqual(skipped, 0)
cw = http._called_with
Expand Down
Loading

0 comments on commit 2d40365

Please sign in to comment.