From 8c5fd7de5d42d198238418b0cf1d89f68c742935 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 10:36:52 -0800 Subject: [PATCH 001/349] moved previous client to deprecated, added files for new client --- google/cloud/bigtable/__init__.py | 15 +- google/cloud/bigtable/client.py | 508 +---------------- google/cloud/bigtable/deprecated/__init__.py | 25 + .../bigtable/{ => deprecated}/app_profile.py | 0 .../cloud/bigtable/{ => deprecated}/backup.py | 0 .../bigtable/{ => deprecated}/batcher.py | 0 google/cloud/bigtable/deprecated/client.py | 513 ++++++++++++++++++ .../bigtable/{ => deprecated}/cluster.py | 0 .../{ => deprecated}/column_family.py | 0 .../{ => deprecated}/encryption_info.py | 0 .../cloud/bigtable/{ => deprecated}/enums.py | 0 .../cloud/bigtable/{ => deprecated}/error.py | 0 .../{ => deprecated}/gapic_version.py | 0 .../bigtable/{ => deprecated}/instance.py | 0 .../cloud/bigtable/{ => deprecated}/policy.py | 0 .../cloud/bigtable/{ => deprecated}/py.typed | 0 google/cloud/bigtable/{ => deprecated}/row.py | 0 .../bigtable/{ => deprecated}/row_data.py | 0 .../bigtable/{ => deprecated}/row_filters.py | 0 .../bigtable/{ => deprecated}/row_merger.py | 0 .../bigtable/{ => deprecated}/row_set.py | 0 .../cloud/bigtable/{ => deprecated}/table.py | 0 22 files changed, 563 insertions(+), 498 deletions(-) create mode 100644 google/cloud/bigtable/deprecated/__init__.py rename google/cloud/bigtable/{ => deprecated}/app_profile.py (100%) rename google/cloud/bigtable/{ => deprecated}/backup.py (100%) rename google/cloud/bigtable/{ => deprecated}/batcher.py (100%) create mode 100644 google/cloud/bigtable/deprecated/client.py rename google/cloud/bigtable/{ => deprecated}/cluster.py (100%) rename google/cloud/bigtable/{ => deprecated}/column_family.py (100%) rename google/cloud/bigtable/{ => deprecated}/encryption_info.py (100%) rename google/cloud/bigtable/{ => deprecated}/enums.py (100%) rename google/cloud/bigtable/{ => deprecated}/error.py (100%) rename google/cloud/bigtable/{ => deprecated}/gapic_version.py (100%) rename google/cloud/bigtable/{ => deprecated}/instance.py (100%) rename google/cloud/bigtable/{ => deprecated}/policy.py (100%) rename google/cloud/bigtable/{ => deprecated}/py.typed (100%) rename google/cloud/bigtable/{ => deprecated}/row.py (100%) rename google/cloud/bigtable/{ => deprecated}/row_data.py (100%) rename google/cloud/bigtable/{ => deprecated}/row_filters.py (100%) rename google/cloud/bigtable/{ => deprecated}/row_merger.py (100%) rename google/cloud/bigtable/{ => deprecated}/row_set.py (100%) rename google/cloud/bigtable/{ => deprecated}/table.py (100%) diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index 7331ff241..44b5092f4 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -1,4 +1,5 @@ -# Copyright 2015 Google LLC +# -*- coding: utf-8 -*- +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,15 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""Google Cloud Bigtable API package.""" - -from google.cloud.bigtable.client import Client - +# from google.cloud.bigtable import gapic_version as package_version -__version__: str - __version__ = package_version.__version__ -__all__ = ["__version__", "Client"] +from google.cloud.bigtable_async.client import BigtableDataClient + +__all__ = ("BigtableDataClient") diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index c82a268c6..ede268cb0 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -1,4 +1,4 @@ -# Copyright 2015 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,503 +11,33 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -"""Parent client for calling the Google Cloud Bigtable API. - -This is the base from which all interactions with the API occur. - -In the hierarchy of API concepts - -* a :class:`~google.cloud.bigtable.client.Client` owns an - :class:`~google.cloud.bigtable.instance.Instance` -* an :class:`~google.cloud.bigtable.instance.Instance` owns a - :class:`~google.cloud.bigtable.table.Table` -* a :class:`~google.cloud.bigtable.table.Table` owns a - :class:`~.column_family.ColumnFamily` -* a :class:`~google.cloud.bigtable.table.Table` owns a - :class:`~google.cloud.bigtable.row.Row` (and all the cells in the row) -""" -import os -import warnings -import grpc # type: ignore - -from google.api_core.gapic_v1 import client_info as client_info_lib -import google.auth # type: ignore -from google.auth.credentials import AnonymousCredentials # type: ignore - -from google.cloud import bigtable_v2 -from google.cloud import bigtable_admin_v2 -from google.cloud.bigtable_v2.services.bigtable.transports import BigtableGrpcTransport -from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin.transports import ( - BigtableInstanceAdminGrpcTransport, -) -from google.cloud.bigtable_admin_v2.services.bigtable_table_admin.transports import ( - BigtableTableAdminGrpcTransport, -) - -from google.cloud import bigtable -from google.cloud.bigtable.instance import Instance -from google.cloud.bigtable.cluster import Cluster - -from google.cloud.client import ClientWithProject # type: ignore - -from google.cloud.bigtable_admin_v2.types import instance -from google.cloud.bigtable.cluster import _CLUSTER_NAME_RE -from google.cloud.environment_vars import BIGTABLE_EMULATOR # type: ignore - - -INSTANCE_TYPE_PRODUCTION = instance.Instance.Type.PRODUCTION -INSTANCE_TYPE_DEVELOPMENT = instance.Instance.Type.DEVELOPMENT -INSTANCE_TYPE_UNSPECIFIED = instance.Instance.Type.TYPE_UNSPECIFIED -SPANNER_ADMIN_SCOPE = "https://www.googleapis.com/auth/spanner.admin" -ADMIN_SCOPE = "https://www.googleapis.com/auth/bigtable.admin" -"""Scope for interacting with the Cluster Admin and Table Admin APIs.""" -DATA_SCOPE = "https://www.googleapis.com/auth/bigtable.data" -"""Scope for reading and writing table data.""" -READ_ONLY_SCOPE = "https://www.googleapis.com/auth/bigtable.data.readonly" -"""Scope for reading table data.""" - -_DEFAULT_BIGTABLE_EMULATOR_CLIENT = "google-cloud-bigtable-emulator" -_GRPC_CHANNEL_OPTIONS = ( - ("grpc.max_send_message_length", -1), - ("grpc.max_receive_message_length", -1), - ("grpc.keepalive_time_ms", 30000), - ("grpc.keepalive_timeout_ms", 10000), -) - - -def _create_gapic_client(client_class, client_options=None, transport=None): - def inner(self): - return client_class( - credentials=None, - client_info=self._client_info, - client_options=client_options, - transport=transport, - ) - - return inner - - -class Client(ClientWithProject): - """Client for interacting with Google Cloud Bigtable API. - - .. note:: - - Since the Cloud Bigtable API requires the gRPC transport, no - ``_http`` argument is accepted by this class. - - :type project: :class:`str` or :func:`unicode ` - :param project: (Optional) The ID of the project which owns the - instances, tables and data. If not provided, will - attempt to determine from the environment. - - :type credentials: :class:`~google.auth.credentials.Credentials` - :param credentials: (Optional) The OAuth2 Credentials to use for this - client. If not passed, falls back to the default - inferred from the environment. - - :type read_only: bool - :param read_only: (Optional) Boolean indicating if the data scope should be - for reading only (or for writing as well). Defaults to - :data:`False`. - - :type admin: bool - :param admin: (Optional) Boolean indicating if the client will be used to - interact with the Instance Admin or Table Admin APIs. This - requires the :const:`ADMIN_SCOPE`. Defaults to :data:`False`. - - :type: client_info: :class:`google.api_core.gapic_v1.client_info.ClientInfo` - :param client_info: - The client info used to send a user-agent string along with API - requests. If ``None``, then default info will be used. Generally, - you only need to set this if you're developing your own library - or partner tool. - - :type client_options: :class:`~google.api_core.client_options.ClientOptions` - or :class:`dict` - :param client_options: (Optional) Client options used to set user options - on the client. API Endpoint should be set through client_options. - - :type admin_client_options: - :class:`~google.api_core.client_options.ClientOptions` or :class:`dict` - :param admin_client_options: (Optional) Client options used to set user - options on the client. API Endpoint for admin operations should be set - through admin_client_options. - - :type channel: :instance: grpc.Channel - :param channel (grpc.Channel): (Optional) DEPRECATED: - A ``Channel`` instance through which to make calls. - This argument is mutually exclusive with ``credentials``; - providing both will raise an exception. No longer used. - - :raises: :class:`ValueError ` if both ``read_only`` - and ``admin`` are :data:`True` - """ +from __future__ import annotations - _table_data_client = None - _table_admin_client = None - _instance_admin_client = None +from google.cloud.client import ClientWithProject +class BigtableDataClient(ClientWithProject): def __init__( self, - project=None, - credentials=None, - read_only=False, - admin=False, - client_info=None, - client_options=None, - admin_client_options=None, - channel=None, + *, + project: str|None = None, + credentials: google.auth.credentials.Credentials|None = None, + client_options: dict[str, Any] | "google.api_core.client_options.ClientOptions" | None = None, + metadata: list[tuple[str, str]]|None = None, ): - if client_info is None: - client_info = client_info_lib.ClientInfo( - client_library_version=bigtable.__version__, - ) - if read_only and admin: - raise ValueError( - "A read-only client cannot also perform" "administrative actions." - ) - - # NOTE: We set the scopes **before** calling the parent constructor. - # It **may** use those scopes in ``with_scopes_if_required``. - self._read_only = bool(read_only) - self._admin = bool(admin) - self._client_info = client_info - self._emulator_host = os.getenv(BIGTABLE_EMULATOR) - - if self._emulator_host is not None: - if credentials is None: - credentials = AnonymousCredentials() - if project is None: - project = _DEFAULT_BIGTABLE_EMULATOR_CLIENT - - if channel is not None: - warnings.warn( - "'channel' is deprecated and no longer used.", - DeprecationWarning, - stacklevel=2, - ) - - self._client_options = client_options - self._admin_client_options = admin_client_options - self._channel = channel - self.SCOPE = self._get_scopes() - super(Client, self).__init__( - project=project, - credentials=credentials, - client_options=client_options, - ) - - def _get_scopes(self): - """Get the scopes corresponding to admin / read-only state. - - Returns: - Tuple[str, ...]: The tuple of scopes. """ - if self._read_only: - scopes = (READ_ONLY_SCOPE,) - else: - scopes = (DATA_SCOPE,) + Create a client instance - if self._admin: - scopes += (ADMIN_SCOPE,) - - return scopes - - def _emulator_channel(self, transport, options): - """Create a channel using self._credentials - - Works in a similar way to ``grpc.secure_channel`` but using - ``grpc.local_channel_credentials`` rather than - ``grpc.ssh_channel_credentials`` to allow easy connection to a - local emulator. - - Returns: - grpc.Channel or grpc.aio.Channel + Args: + metadata: a list of metadata headers to be attached to all calls with this client """ - # TODO: Implement a special credentials type for emulator and use - # "transport.create_channel" to create gRPC channels once google-auth - # extends it's allowed credentials types. - # Note: this code also exists in the firestore client. - if "GrpcAsyncIOTransport" in str(transport.__name__): - return grpc.aio.secure_channel( - self._emulator_host, - self._local_composite_credentials(), - options=options, - ) - else: - return grpc.secure_channel( - self._emulator_host, - self._local_composite_credentials(), - options=options, - ) - - def _local_composite_credentials(self): - """Create credentials for the local emulator channel. - - :return: grpc.ChannelCredentials - """ - credentials = google.auth.credentials.with_scopes_if_required( - self._credentials, None - ) - request = google.auth.transport.requests.Request() - - # Create the metadata plugin for inserting the authorization header. - metadata_plugin = google.auth.transport.grpc.AuthMetadataPlugin( - credentials, request - ) - - # Create a set of grpc.CallCredentials using the metadata plugin. - google_auth_credentials = grpc.metadata_call_credentials(metadata_plugin) - - # Using the local_credentials to allow connection to emulator - local_credentials = grpc.local_channel_credentials() - - # Combine the local credentials and the authorization credentials. - return grpc.composite_channel_credentials( - local_credentials, google_auth_credentials - ) + pass - def _create_gapic_client_channel(self, client_class, grpc_transport): - if self._emulator_host is not None: - api_endpoint = self._emulator_host - elif self._client_options and self._client_options.api_endpoint: - api_endpoint = self._client_options.api_endpoint - else: - api_endpoint = client_class.DEFAULT_ENDPOINT - if self._emulator_host is not None: - channel = self._emulator_channel( - transport=grpc_transport, - options=_GRPC_CHANNEL_OPTIONS, - ) - else: - channel = grpc_transport.create_channel( - host=api_endpoint, - credentials=self._credentials, - options=_GRPC_CHANNEL_OPTIONS, - ) - return grpc_transport(channel=channel, host=api_endpoint) + def get_table(instance_id:str, table_id:str, app_profile_id:str|None=None) -> Table: + pass - @property - def project_path(self): - """Project name to be used with Instance Admin API. - - .. note:: - - This property will not change if ``project`` does not, but the - return value is not cached. - - For example: - - .. literalinclude:: snippets.py - :start-after: [START bigtable_api_project_path] - :end-before: [END bigtable_api_project_path] - :dedent: 4 - - The project name is of the form - - ``"projects/{project}"`` - - :rtype: str - :returns: Return a fully-qualified project string. - """ - return self.instance_admin_client.common_project_path(self.project) - - @property - def table_data_client(self): - """Getter for the gRPC stub used for the Table Admin API. - - For example: - - .. literalinclude:: snippets.py - :start-after: [START bigtable_api_table_data_client] - :end-before: [END bigtable_api_table_data_client] - :dedent: 4 - - :rtype: :class:`.bigtable_v2.BigtableClient` - :returns: A BigtableClient object. - """ - if self._table_data_client is None: - transport = self._create_gapic_client_channel( - bigtable_v2.BigtableClient, - BigtableGrpcTransport, - ) - klass = _create_gapic_client( - bigtable_v2.BigtableClient, - client_options=self._client_options, - transport=transport, - ) - self._table_data_client = klass(self) - return self._table_data_client - - @property - def table_admin_client(self): - """Getter for the gRPC stub used for the Table Admin API. - - For example: - - .. literalinclude:: snippets.py - :start-after: [START bigtable_api_table_admin_client] - :end-before: [END bigtable_api_table_admin_client] - :dedent: 4 - - :rtype: :class:`.bigtable_admin_pb2.BigtableTableAdmin` - :returns: A BigtableTableAdmin instance. - :raises: :class:`ValueError ` if the current - client is not an admin client or if it has not been - :meth:`start`-ed. - """ - if self._table_admin_client is None: - if not self._admin: - raise ValueError("Client is not an admin client.") - - transport = self._create_gapic_client_channel( - bigtable_admin_v2.BigtableTableAdminClient, - BigtableTableAdminGrpcTransport, - ) - klass = _create_gapic_client( - bigtable_admin_v2.BigtableTableAdminClient, - client_options=self._admin_client_options, - transport=transport, - ) - self._table_admin_client = klass(self) - return self._table_admin_client - - @property - def instance_admin_client(self): - """Getter for the gRPC stub used for the Table Admin API. - - For example: - - .. literalinclude:: snippets.py - :start-after: [START bigtable_api_instance_admin_client] - :end-before: [END bigtable_api_instance_admin_client] - :dedent: 4 - - :rtype: :class:`.bigtable_admin_pb2.BigtableInstanceAdmin` - :returns: A BigtableInstanceAdmin instance. - :raises: :class:`ValueError ` if the current - client is not an admin client or if it has not been - :meth:`start`-ed. - """ - if self._instance_admin_client is None: - if not self._admin: - raise ValueError("Client is not an admin client.") - - transport = self._create_gapic_client_channel( - bigtable_admin_v2.BigtableInstanceAdminClient, - BigtableInstanceAdminGrpcTransport, - ) - klass = _create_gapic_client( - bigtable_admin_v2.BigtableInstanceAdminClient, - client_options=self._admin_client_options, - transport=transport, - ) - self._instance_admin_client = klass(self) - return self._instance_admin_client - - def instance(self, instance_id, display_name=None, instance_type=None, labels=None): - """Factory to create a instance associated with this client. - - For example: - - .. literalinclude:: snippets.py - :start-after: [START bigtable_api_create_prod_instance] - :end-before: [END bigtable_api_create_prod_instance] - :dedent: 4 - - :type instance_id: str - :param instance_id: The ID of the instance. - - :type display_name: str - :param display_name: (Optional) The display name for the instance in - the Cloud Console UI. (Must be between 4 and 30 - characters.) If this value is not set in the - constructor, will fall back to the instance ID. - - :type instance_type: int - :param instance_type: (Optional) The type of the instance. - Possible values are represented - by the following constants: - :data:`google.cloud.bigtable.instance.InstanceType.PRODUCTION`. - :data:`google.cloud.bigtable.instance.InstanceType.DEVELOPMENT`, - Defaults to - :data:`google.cloud.bigtable.instance.InstanceType.UNSPECIFIED`. - - :type labels: dict - :param labels: (Optional) Labels are a flexible and lightweight - mechanism for organizing cloud resources into groups - that reflect a customer's organizational needs and - deployment strategies. They can be used to filter - resources and aggregate metrics. Label keys must be - between 1 and 63 characters long. Maximum 64 labels can - be associated with a given resource. Label values must - be between 0 and 63 characters long. Keys and values - must both be under 128 bytes. - - :rtype: :class:`~google.cloud.bigtable.instance.Instance` - :returns: an instance owned by this client. - """ - return Instance( - instance_id, - self, - display_name=display_name, - instance_type=instance_type, - labels=labels, - ) - - def list_instances(self): - """List instances owned by the project. - - For example: - - .. literalinclude:: snippets.py - :start-after: [START bigtable_api_list_instances] - :end-before: [END bigtable_api_list_instances] - :dedent: 4 - - :rtype: tuple - :returns: - (instances, failed_locations), where 'instances' is list of - :class:`google.cloud.bigtable.instance.Instance`, and - 'failed_locations' is a list of locations which could not - be resolved. - """ - resp = self.instance_admin_client.list_instances( - request={"parent": self.project_path} - ) - instances = [Instance.from_pb(instance, self) for instance in resp.instances] - return instances, resp.failed_locations - - def list_clusters(self): - """List the clusters in the project. - - For example: - - .. literalinclude:: snippets.py - :start-after: [START bigtable_api_list_clusters_in_project] - :end-before: [END bigtable_api_list_clusters_in_project] - :dedent: 4 - - :rtype: tuple - :returns: - (clusters, failed_locations), where 'clusters' is list of - :class:`google.cloud.bigtable.instance.Cluster`, and - 'failed_locations' is a list of strings representing - locations which could not be resolved. - """ - resp = self.instance_admin_client.list_clusters( - request={ - "parent": self.instance_admin_client.instance_path(self.project, "-") - } - ) - clusters = [] - instances = {} - for cluster in resp.clusters: - match_cluster_name = _CLUSTER_NAME_RE.match(cluster.name) - instance_id = match_cluster_name.group("instance") - if instance_id not in instances: - instances[instance_id] = self.instance(instance_id) - clusters.append(Cluster.from_pb(cluster, instances[instance_id])) - return clusters, resp.failed_locations +if __name__ == "__main__": + client = BigtableDataClient() + client.get_table("instance_id", "table_id") diff --git a/google/cloud/bigtable/deprecated/__init__.py b/google/cloud/bigtable/deprecated/__init__.py new file mode 100644 index 000000000..7331ff241 --- /dev/null +++ b/google/cloud/bigtable/deprecated/__init__.py @@ -0,0 +1,25 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Google Cloud Bigtable API package.""" + +from google.cloud.bigtable.client import Client + +from google.cloud.bigtable import gapic_version as package_version + +__version__: str + +__version__ = package_version.__version__ + +__all__ = ["__version__", "Client"] diff --git a/google/cloud/bigtable/app_profile.py b/google/cloud/bigtable/deprecated/app_profile.py similarity index 100% rename from google/cloud/bigtable/app_profile.py rename to google/cloud/bigtable/deprecated/app_profile.py diff --git a/google/cloud/bigtable/backup.py b/google/cloud/bigtable/deprecated/backup.py similarity index 100% rename from google/cloud/bigtable/backup.py rename to google/cloud/bigtable/deprecated/backup.py diff --git a/google/cloud/bigtable/batcher.py b/google/cloud/bigtable/deprecated/batcher.py similarity index 100% rename from google/cloud/bigtable/batcher.py rename to google/cloud/bigtable/deprecated/batcher.py diff --git a/google/cloud/bigtable/deprecated/client.py b/google/cloud/bigtable/deprecated/client.py new file mode 100644 index 000000000..c82a268c6 --- /dev/null +++ b/google/cloud/bigtable/deprecated/client.py @@ -0,0 +1,513 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Parent client for calling the Google Cloud Bigtable API. + +This is the base from which all interactions with the API occur. + +In the hierarchy of API concepts + +* a :class:`~google.cloud.bigtable.client.Client` owns an + :class:`~google.cloud.bigtable.instance.Instance` +* an :class:`~google.cloud.bigtable.instance.Instance` owns a + :class:`~google.cloud.bigtable.table.Table` +* a :class:`~google.cloud.bigtable.table.Table` owns a + :class:`~.column_family.ColumnFamily` +* a :class:`~google.cloud.bigtable.table.Table` owns a + :class:`~google.cloud.bigtable.row.Row` (and all the cells in the row) +""" +import os +import warnings +import grpc # type: ignore + +from google.api_core.gapic_v1 import client_info as client_info_lib +import google.auth # type: ignore +from google.auth.credentials import AnonymousCredentials # type: ignore + +from google.cloud import bigtable_v2 +from google.cloud import bigtable_admin_v2 +from google.cloud.bigtable_v2.services.bigtable.transports import BigtableGrpcTransport +from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin.transports import ( + BigtableInstanceAdminGrpcTransport, +) +from google.cloud.bigtable_admin_v2.services.bigtable_table_admin.transports import ( + BigtableTableAdminGrpcTransport, +) + +from google.cloud import bigtable +from google.cloud.bigtable.instance import Instance +from google.cloud.bigtable.cluster import Cluster + +from google.cloud.client import ClientWithProject # type: ignore + +from google.cloud.bigtable_admin_v2.types import instance +from google.cloud.bigtable.cluster import _CLUSTER_NAME_RE +from google.cloud.environment_vars import BIGTABLE_EMULATOR # type: ignore + + +INSTANCE_TYPE_PRODUCTION = instance.Instance.Type.PRODUCTION +INSTANCE_TYPE_DEVELOPMENT = instance.Instance.Type.DEVELOPMENT +INSTANCE_TYPE_UNSPECIFIED = instance.Instance.Type.TYPE_UNSPECIFIED +SPANNER_ADMIN_SCOPE = "https://www.googleapis.com/auth/spanner.admin" +ADMIN_SCOPE = "https://www.googleapis.com/auth/bigtable.admin" +"""Scope for interacting with the Cluster Admin and Table Admin APIs.""" +DATA_SCOPE = "https://www.googleapis.com/auth/bigtable.data" +"""Scope for reading and writing table data.""" +READ_ONLY_SCOPE = "https://www.googleapis.com/auth/bigtable.data.readonly" +"""Scope for reading table data.""" + +_DEFAULT_BIGTABLE_EMULATOR_CLIENT = "google-cloud-bigtable-emulator" +_GRPC_CHANNEL_OPTIONS = ( + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ("grpc.keepalive_time_ms", 30000), + ("grpc.keepalive_timeout_ms", 10000), +) + + +def _create_gapic_client(client_class, client_options=None, transport=None): + def inner(self): + return client_class( + credentials=None, + client_info=self._client_info, + client_options=client_options, + transport=transport, + ) + + return inner + + +class Client(ClientWithProject): + """Client for interacting with Google Cloud Bigtable API. + + .. note:: + + Since the Cloud Bigtable API requires the gRPC transport, no + ``_http`` argument is accepted by this class. + + :type project: :class:`str` or :func:`unicode ` + :param project: (Optional) The ID of the project which owns the + instances, tables and data. If not provided, will + attempt to determine from the environment. + + :type credentials: :class:`~google.auth.credentials.Credentials` + :param credentials: (Optional) The OAuth2 Credentials to use for this + client. If not passed, falls back to the default + inferred from the environment. + + :type read_only: bool + :param read_only: (Optional) Boolean indicating if the data scope should be + for reading only (or for writing as well). Defaults to + :data:`False`. + + :type admin: bool + :param admin: (Optional) Boolean indicating if the client will be used to + interact with the Instance Admin or Table Admin APIs. This + requires the :const:`ADMIN_SCOPE`. Defaults to :data:`False`. + + :type: client_info: :class:`google.api_core.gapic_v1.client_info.ClientInfo` + :param client_info: + The client info used to send a user-agent string along with API + requests. If ``None``, then default info will be used. Generally, + you only need to set this if you're developing your own library + or partner tool. + + :type client_options: :class:`~google.api_core.client_options.ClientOptions` + or :class:`dict` + :param client_options: (Optional) Client options used to set user options + on the client. API Endpoint should be set through client_options. + + :type admin_client_options: + :class:`~google.api_core.client_options.ClientOptions` or :class:`dict` + :param admin_client_options: (Optional) Client options used to set user + options on the client. API Endpoint for admin operations should be set + through admin_client_options. + + :type channel: :instance: grpc.Channel + :param channel (grpc.Channel): (Optional) DEPRECATED: + A ``Channel`` instance through which to make calls. + This argument is mutually exclusive with ``credentials``; + providing both will raise an exception. No longer used. + + :raises: :class:`ValueError ` if both ``read_only`` + and ``admin`` are :data:`True` + """ + + _table_data_client = None + _table_admin_client = None + _instance_admin_client = None + + def __init__( + self, + project=None, + credentials=None, + read_only=False, + admin=False, + client_info=None, + client_options=None, + admin_client_options=None, + channel=None, + ): + if client_info is None: + client_info = client_info_lib.ClientInfo( + client_library_version=bigtable.__version__, + ) + if read_only and admin: + raise ValueError( + "A read-only client cannot also perform" "administrative actions." + ) + + # NOTE: We set the scopes **before** calling the parent constructor. + # It **may** use those scopes in ``with_scopes_if_required``. + self._read_only = bool(read_only) + self._admin = bool(admin) + self._client_info = client_info + self._emulator_host = os.getenv(BIGTABLE_EMULATOR) + + if self._emulator_host is not None: + if credentials is None: + credentials = AnonymousCredentials() + if project is None: + project = _DEFAULT_BIGTABLE_EMULATOR_CLIENT + + if channel is not None: + warnings.warn( + "'channel' is deprecated and no longer used.", + DeprecationWarning, + stacklevel=2, + ) + + self._client_options = client_options + self._admin_client_options = admin_client_options + self._channel = channel + self.SCOPE = self._get_scopes() + super(Client, self).__init__( + project=project, + credentials=credentials, + client_options=client_options, + ) + + def _get_scopes(self): + """Get the scopes corresponding to admin / read-only state. + + Returns: + Tuple[str, ...]: The tuple of scopes. + """ + if self._read_only: + scopes = (READ_ONLY_SCOPE,) + else: + scopes = (DATA_SCOPE,) + + if self._admin: + scopes += (ADMIN_SCOPE,) + + return scopes + + def _emulator_channel(self, transport, options): + """Create a channel using self._credentials + + Works in a similar way to ``grpc.secure_channel`` but using + ``grpc.local_channel_credentials`` rather than + ``grpc.ssh_channel_credentials`` to allow easy connection to a + local emulator. + + Returns: + grpc.Channel or grpc.aio.Channel + """ + # TODO: Implement a special credentials type for emulator and use + # "transport.create_channel" to create gRPC channels once google-auth + # extends it's allowed credentials types. + # Note: this code also exists in the firestore client. + if "GrpcAsyncIOTransport" in str(transport.__name__): + return grpc.aio.secure_channel( + self._emulator_host, + self._local_composite_credentials(), + options=options, + ) + else: + return grpc.secure_channel( + self._emulator_host, + self._local_composite_credentials(), + options=options, + ) + + def _local_composite_credentials(self): + """Create credentials for the local emulator channel. + + :return: grpc.ChannelCredentials + """ + credentials = google.auth.credentials.with_scopes_if_required( + self._credentials, None + ) + request = google.auth.transport.requests.Request() + + # Create the metadata plugin for inserting the authorization header. + metadata_plugin = google.auth.transport.grpc.AuthMetadataPlugin( + credentials, request + ) + + # Create a set of grpc.CallCredentials using the metadata plugin. + google_auth_credentials = grpc.metadata_call_credentials(metadata_plugin) + + # Using the local_credentials to allow connection to emulator + local_credentials = grpc.local_channel_credentials() + + # Combine the local credentials and the authorization credentials. + return grpc.composite_channel_credentials( + local_credentials, google_auth_credentials + ) + + def _create_gapic_client_channel(self, client_class, grpc_transport): + if self._emulator_host is not None: + api_endpoint = self._emulator_host + elif self._client_options and self._client_options.api_endpoint: + api_endpoint = self._client_options.api_endpoint + else: + api_endpoint = client_class.DEFAULT_ENDPOINT + + if self._emulator_host is not None: + channel = self._emulator_channel( + transport=grpc_transport, + options=_GRPC_CHANNEL_OPTIONS, + ) + else: + channel = grpc_transport.create_channel( + host=api_endpoint, + credentials=self._credentials, + options=_GRPC_CHANNEL_OPTIONS, + ) + return grpc_transport(channel=channel, host=api_endpoint) + + @property + def project_path(self): + """Project name to be used with Instance Admin API. + + .. note:: + + This property will not change if ``project`` does not, but the + return value is not cached. + + For example: + + .. literalinclude:: snippets.py + :start-after: [START bigtable_api_project_path] + :end-before: [END bigtable_api_project_path] + :dedent: 4 + + The project name is of the form + + ``"projects/{project}"`` + + :rtype: str + :returns: Return a fully-qualified project string. + """ + return self.instance_admin_client.common_project_path(self.project) + + @property + def table_data_client(self): + """Getter for the gRPC stub used for the Table Admin API. + + For example: + + .. literalinclude:: snippets.py + :start-after: [START bigtable_api_table_data_client] + :end-before: [END bigtable_api_table_data_client] + :dedent: 4 + + :rtype: :class:`.bigtable_v2.BigtableClient` + :returns: A BigtableClient object. + """ + if self._table_data_client is None: + transport = self._create_gapic_client_channel( + bigtable_v2.BigtableClient, + BigtableGrpcTransport, + ) + klass = _create_gapic_client( + bigtable_v2.BigtableClient, + client_options=self._client_options, + transport=transport, + ) + self._table_data_client = klass(self) + return self._table_data_client + + @property + def table_admin_client(self): + """Getter for the gRPC stub used for the Table Admin API. + + For example: + + .. literalinclude:: snippets.py + :start-after: [START bigtable_api_table_admin_client] + :end-before: [END bigtable_api_table_admin_client] + :dedent: 4 + + :rtype: :class:`.bigtable_admin_pb2.BigtableTableAdmin` + :returns: A BigtableTableAdmin instance. + :raises: :class:`ValueError ` if the current + client is not an admin client or if it has not been + :meth:`start`-ed. + """ + if self._table_admin_client is None: + if not self._admin: + raise ValueError("Client is not an admin client.") + + transport = self._create_gapic_client_channel( + bigtable_admin_v2.BigtableTableAdminClient, + BigtableTableAdminGrpcTransport, + ) + klass = _create_gapic_client( + bigtable_admin_v2.BigtableTableAdminClient, + client_options=self._admin_client_options, + transport=transport, + ) + self._table_admin_client = klass(self) + return self._table_admin_client + + @property + def instance_admin_client(self): + """Getter for the gRPC stub used for the Table Admin API. + + For example: + + .. literalinclude:: snippets.py + :start-after: [START bigtable_api_instance_admin_client] + :end-before: [END bigtable_api_instance_admin_client] + :dedent: 4 + + :rtype: :class:`.bigtable_admin_pb2.BigtableInstanceAdmin` + :returns: A BigtableInstanceAdmin instance. + :raises: :class:`ValueError ` if the current + client is not an admin client or if it has not been + :meth:`start`-ed. + """ + if self._instance_admin_client is None: + if not self._admin: + raise ValueError("Client is not an admin client.") + + transport = self._create_gapic_client_channel( + bigtable_admin_v2.BigtableInstanceAdminClient, + BigtableInstanceAdminGrpcTransport, + ) + klass = _create_gapic_client( + bigtable_admin_v2.BigtableInstanceAdminClient, + client_options=self._admin_client_options, + transport=transport, + ) + self._instance_admin_client = klass(self) + return self._instance_admin_client + + def instance(self, instance_id, display_name=None, instance_type=None, labels=None): + """Factory to create a instance associated with this client. + + For example: + + .. literalinclude:: snippets.py + :start-after: [START bigtable_api_create_prod_instance] + :end-before: [END bigtable_api_create_prod_instance] + :dedent: 4 + + :type instance_id: str + :param instance_id: The ID of the instance. + + :type display_name: str + :param display_name: (Optional) The display name for the instance in + the Cloud Console UI. (Must be between 4 and 30 + characters.) If this value is not set in the + constructor, will fall back to the instance ID. + + :type instance_type: int + :param instance_type: (Optional) The type of the instance. + Possible values are represented + by the following constants: + :data:`google.cloud.bigtable.instance.InstanceType.PRODUCTION`. + :data:`google.cloud.bigtable.instance.InstanceType.DEVELOPMENT`, + Defaults to + :data:`google.cloud.bigtable.instance.InstanceType.UNSPECIFIED`. + + :type labels: dict + :param labels: (Optional) Labels are a flexible and lightweight + mechanism for organizing cloud resources into groups + that reflect a customer's organizational needs and + deployment strategies. They can be used to filter + resources and aggregate metrics. Label keys must be + between 1 and 63 characters long. Maximum 64 labels can + be associated with a given resource. Label values must + be between 0 and 63 characters long. Keys and values + must both be under 128 bytes. + + :rtype: :class:`~google.cloud.bigtable.instance.Instance` + :returns: an instance owned by this client. + """ + return Instance( + instance_id, + self, + display_name=display_name, + instance_type=instance_type, + labels=labels, + ) + + def list_instances(self): + """List instances owned by the project. + + For example: + + .. literalinclude:: snippets.py + :start-after: [START bigtable_api_list_instances] + :end-before: [END bigtable_api_list_instances] + :dedent: 4 + + :rtype: tuple + :returns: + (instances, failed_locations), where 'instances' is list of + :class:`google.cloud.bigtable.instance.Instance`, and + 'failed_locations' is a list of locations which could not + be resolved. + """ + resp = self.instance_admin_client.list_instances( + request={"parent": self.project_path} + ) + instances = [Instance.from_pb(instance, self) for instance in resp.instances] + return instances, resp.failed_locations + + def list_clusters(self): + """List the clusters in the project. + + For example: + + .. literalinclude:: snippets.py + :start-after: [START bigtable_api_list_clusters_in_project] + :end-before: [END bigtable_api_list_clusters_in_project] + :dedent: 4 + + :rtype: tuple + :returns: + (clusters, failed_locations), where 'clusters' is list of + :class:`google.cloud.bigtable.instance.Cluster`, and + 'failed_locations' is a list of strings representing + locations which could not be resolved. + """ + resp = self.instance_admin_client.list_clusters( + request={ + "parent": self.instance_admin_client.instance_path(self.project, "-") + } + ) + clusters = [] + instances = {} + for cluster in resp.clusters: + match_cluster_name = _CLUSTER_NAME_RE.match(cluster.name) + instance_id = match_cluster_name.group("instance") + if instance_id not in instances: + instances[instance_id] = self.instance(instance_id) + clusters.append(Cluster.from_pb(cluster, instances[instance_id])) + return clusters, resp.failed_locations diff --git a/google/cloud/bigtable/cluster.py b/google/cloud/bigtable/deprecated/cluster.py similarity index 100% rename from google/cloud/bigtable/cluster.py rename to google/cloud/bigtable/deprecated/cluster.py diff --git a/google/cloud/bigtable/column_family.py b/google/cloud/bigtable/deprecated/column_family.py similarity index 100% rename from google/cloud/bigtable/column_family.py rename to google/cloud/bigtable/deprecated/column_family.py diff --git a/google/cloud/bigtable/encryption_info.py b/google/cloud/bigtable/deprecated/encryption_info.py similarity index 100% rename from google/cloud/bigtable/encryption_info.py rename to google/cloud/bigtable/deprecated/encryption_info.py diff --git a/google/cloud/bigtable/enums.py b/google/cloud/bigtable/deprecated/enums.py similarity index 100% rename from google/cloud/bigtable/enums.py rename to google/cloud/bigtable/deprecated/enums.py diff --git a/google/cloud/bigtable/error.py b/google/cloud/bigtable/deprecated/error.py similarity index 100% rename from google/cloud/bigtable/error.py rename to google/cloud/bigtable/deprecated/error.py diff --git a/google/cloud/bigtable/gapic_version.py b/google/cloud/bigtable/deprecated/gapic_version.py similarity index 100% rename from google/cloud/bigtable/gapic_version.py rename to google/cloud/bigtable/deprecated/gapic_version.py diff --git a/google/cloud/bigtable/instance.py b/google/cloud/bigtable/deprecated/instance.py similarity index 100% rename from google/cloud/bigtable/instance.py rename to google/cloud/bigtable/deprecated/instance.py diff --git a/google/cloud/bigtable/policy.py b/google/cloud/bigtable/deprecated/policy.py similarity index 100% rename from google/cloud/bigtable/policy.py rename to google/cloud/bigtable/deprecated/policy.py diff --git a/google/cloud/bigtable/py.typed b/google/cloud/bigtable/deprecated/py.typed similarity index 100% rename from google/cloud/bigtable/py.typed rename to google/cloud/bigtable/deprecated/py.typed diff --git a/google/cloud/bigtable/row.py b/google/cloud/bigtable/deprecated/row.py similarity index 100% rename from google/cloud/bigtable/row.py rename to google/cloud/bigtable/deprecated/row.py diff --git a/google/cloud/bigtable/row_data.py b/google/cloud/bigtable/deprecated/row_data.py similarity index 100% rename from google/cloud/bigtable/row_data.py rename to google/cloud/bigtable/deprecated/row_data.py diff --git a/google/cloud/bigtable/row_filters.py b/google/cloud/bigtable/deprecated/row_filters.py similarity index 100% rename from google/cloud/bigtable/row_filters.py rename to google/cloud/bigtable/deprecated/row_filters.py diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/deprecated/row_merger.py similarity index 100% rename from google/cloud/bigtable/row_merger.py rename to google/cloud/bigtable/deprecated/row_merger.py diff --git a/google/cloud/bigtable/row_set.py b/google/cloud/bigtable/deprecated/row_set.py similarity index 100% rename from google/cloud/bigtable/row_set.py rename to google/cloud/bigtable/deprecated/row_set.py diff --git a/google/cloud/bigtable/table.py b/google/cloud/bigtable/deprecated/table.py similarity index 100% rename from google/cloud/bigtable/table.py rename to google/cloud/bigtable/deprecated/table.py From 393860c8329b83f1162a7ef3d8b903a5258bba05 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 10:41:33 -0800 Subject: [PATCH 002/349] added table api skeleton --- google/cloud/bigtable/__init__.py | 3 +- google/cloud/bigtable/client.py | 357 +++++++++++++++++++++++++++++- 2 files changed, 358 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index 44b5092f4..d24e0606f 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -18,5 +18,6 @@ __version__ = package_version.__version__ from google.cloud.bigtable_async.client import BigtableDataClient +from google.cloud.bigtable_async.client import Table -__all__ = ("BigtableDataClient") +__all__ = ("BigtableDataClient", "Table") diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index ede268cb0..19edc4ff4 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -35,7 +35,362 @@ def __init__( pass - def get_table(instance_id:str, table_id:str, app_profile_id:str|None=None) -> Table: + def get_table(self, instance_id:str, table_id:str, app_profile_id:str|None=None) -> Table: + return Table(self, instance_id, table_id, app_profile_id) + +class Table(): + """ + Main Data API surface + + Table object maintains instance_id, table_id, and app_profile_id context, and passes them with + each call + """ + + def __init__( + self, + client:BigtableDataClient, + instance_id:str, + table_id: str, + app_profile_id:str|None=None + ): + pass + + async def read_rows_stream( + self, + query: ReadRowsQuery|dict, + *, + shard:bool=False, + limit:int|None, + cache_size_limit:int|None=None, + operation_timeout:int|float|None=60, + per_row_timeout:int|float|None=10, + idle_timeout:int|float|None=300, + per_request_timeout:int|float|None=None, + metadata: list[tuple[str, str]]|None = None, + ) -> AsyncIterable[RowResponse]: + """ + Returns a generator to asynchronously stream back row data. + + Failed requests within operation_timeout and operation_deadline policies will be retried. + + By default, row data is streamed eagerly over the network, and fully cached in memory + in the generator, which can be consumed as needed. The size of the generator cache can + be configured with cache_size_limit. When the cache is full, the read_rows_stream will pause + the network stream until space is available + + Args: + - query: contains details about which rows to return + - shard: if True, will attempt to split up and distribute query to multiple + backend nodes in parallel + - limit: a limit on the number of rows to return. Actual limit will be + min(limit, query.limit) + - cache_size: the number of rows to cache in memory. If None, no limits. + Defaults to None + - operation_timeout: the time budget for the entire operation, in seconds. + Failed requests will be retried within the budget. + time is only counted while actively waiting on the network. + Completed and cached results can still be accessed after the deadline is complete, + with a DeadlineExceeded exception only raised after cached results are exhausted + - per_row_timeout: the time budget for a single row read, in seconds. If a row takes + longer than per_row_timeout to complete, the ongoing network request will be with a + DeadlineExceeded exception, and a retry may be attempted + Applies only to the underlying network call. + - idle_timeout: the number of idle seconds before an active generator is marked as + stale and the cache is drained. The idle count is reset each time the generator + is yielded from + raises DeadlineExceeded on future yields + - per_request_timeout: the time budget for an individual network request, in seconds. + If it takes longer than this time to complete, the request will be cancelled with + a DeadlineExceeded exception, and a retry will be attempted + - metadata: Strings which should be sent along with the request as metadata headers. + + Returns: + - an asynchronous generator that yields rows returned by the query + Raises: + - DeadlineExceeded: raised after operation timeout + will be chained with a RetryExceptionGroup containing GoogleAPIError exceptions + from any retries that failed + - IdleTimeout: if generator was abandoned + """ + pass + + async def read_rows( + self, + query: ReadRowsQuery|dict, + *, + shard:bool=False, + limit:int|None, + operation_timeout:int|float|None=60, + per_row_timeout:int|float|None=10, + per_request_timeout:int|float|None=None, + metadata: list[tuple[str, str]]|None = None, + ) -> List[RowResponse]: + """ + Helper function that returns a full list instead of a generator + + See read_rows_stream + + Returns: + - a list of the rows returned by the query + """ + pass + + async def read_row( + self, + row_key:str|bytes, + *, + operation_timeout:int|float|None=60, + per_request_timeout:int|float|None=None, + metadata: list[tuple[str, str]]|None = None, + ) -> RowResponse: + """ + Helper function to return a single row + + See read_rows_stream + + Returns: + - the individual row requested + """ + pass + + async def read_rows_sharded( + self, + query_list: list[ReadRowsQuery]|list[dict], + *, + limit:int|None, + cache_size_limit:int|None=None, + operation_timeout:int|float|None=60, + per_row_timeout:int|float|None=10, + idle_timeout:int|float|None=300, + per_request_timeout:int|float|None=None, + metadata: list[tuple[str, str]]|None = None, + ) -> AsyncIterable[RowResponse]: + """ + Runs a sharded query in parallel + + Each query in query list will be run concurrently, with results yielded as they are ready + yielded results may be out of order + + Args: + - query_list: a list of queries to run in parallel + """ + pass + + async def row_exists( + self, + row_key:str|bytes, + *, + operation_timeout:int|float|None=60, + per_request_timeout:int|float|None=None, + metadata: list[tuple[str, str]]|None = None, + ) -> bool: + """ + Helper function to determine if a row exists + + uses the filters: chain(limit cells per row = 1, strip value) + + Returns: + - a bool indicating whether the row exists + """ + pass + + + + async def sample_keys( + self, + *, + operation_timeout:int|float|None=60, + per_sample_timeout:int|float|None=10, + per_request_timeout:int|float|None=None, + metadata: list[tuple[str, str]]|None = None, + ) -> RowKeySamples: + """ + Return a set of RowKeySamples that delimit contiguous sections of the table of + approximately equal size + + RowKeySamples output can be used with ReadRowsQuery.shard() to create a sharded query that + can be parallelized across multiple backend nodes read_rows and read_rows_stream + requests will call sample_keys internally for this purpose when sharding is enabled + + RowKeySamples is simply a type alias for list[tuple[bytes, int]]; a list of + row_keys, along with offset positions in the table + + Returns: + - a set of RowKeySamples the delimit contiguous sections of the table + Raises: + - DeadlineExceeded: raised after operation timeout + will be chained with a RetryExceptionGroup containing all GoogleAPIError + exceptions from any retries that failed + """ + pass + + def mutations_batcher(self, **kwargs) -> MutationsBatcher: + """ + Returns a new mutations batcher instance. + + Can be used to iteratively add mutations that are flushed as a group, + to avoid excess network calls + + Returns: + - a MutationsBatcher context manager that can batch requests + """ + return MutationsBatcher(self, **kwargs) + + async def mutate_row( + self, + row_key: str|bytes, + mutations: List[Mutation]|Mutation, + *, + operation_timeout:int|float|None=60, + per_request_timeout:int|float|None=None, + metadata: list[tuple[str, str]]|None = None, + ): + """ + Mutates a row atomically. + + Cells already present in the row are left unchanged unless explicitly changed + by ``mutation``. + + Idempotent operations (i.e, all mutations have an explicit timestamp) will be + retried on server failure. Non-idempotent operations will not. + + Args: + - row_key: the row to apply mutations to + - mutations: the set of mutations to apply to the row + - operation_timeout: the time budget for the entire operation, in seconds. + Failed requests will be retried within the budget. + time is only counted while actively waiting on the network. + DeadlineExceeded exception raised after timeout + - per_request_timeout: the time budget for an individual network request, + in seconds. If it takes longer than this time to complete, the request + will be cancelled with a DeadlineExceeded exception, and a retry will be + attempted if within operation_timeout budget + - metadata: Strings which should be sent along with the request as metadata headers. + + Raises: + - DeadlineExceeded: raised after operation timeout + will be chained with a RetryExceptionGroup containing all + GoogleAPIError exceptions from any retries that failed + - GoogleAPIError: raised on non-idempotent operations that cannot be + safely retried. + """ + pass + + async def bulk_mutate_rows( + self, + mutation_entries: list[BulkMutationsEntry], + *, + operation_timeout:int|float|None=60, + per_request_timeout:int|float|None=None, + metadata: list[tuple[str, str]]|None = None, + ): + """ + Applies mutations for multiple rows in a single batched request. + + Each individual BulkMutationsEntry is applied atomically, but separate entries + may be applied in arbitrary order (even for entries targetting the same row) + In total, the row_mutations can contain at most 100000 individual mutations + across all entries + + Idempotent entries (i.e., entries with mutations with explicit timestamps) + will be retried on failure. Non-idempotent will not, and will reported in a + raised exception group + + Args: + - mutation_entries: the batches of mutations to apply + Each entry will be applied atomically, but entries will be applied + in arbitrary order + - operation_timeout: the time budget for the entire operation, in seconds. + Failed requests will be retried within the budget. + time is only counted while actively waiting on the network. + DeadlineExceeded exception raised after timeout + - per_request_timeout: the time budget for an individual network request, + in seconds. If it takes longer than this time to complete, the request + will be cancelled with a DeadlineExceeded exception, and a retry will + be attempted if within operation_timeout budget + - metadata: Strings which should be sent along with the request as metadata headers. + + Raises: + - MutationsExceptionGroup if one or more mutations fails + Contains details about any failed entries in .exceptions + """ + pass + + async def check_and_mutate_row( + self, + row_key: str|bytes, + predicate: RowFilter|None, + true_case_mutations: Mutation | list[Mutation] | None = None, + false_case_mutations: Mutation | list[Mutation] | None = None, + operation_timeout:int|float|None=60, + metadata: list[tuple[str, str]]|None = None, + ) -> bool: + """ + Mutates a row atomically based on the output of a predicate filter + + Non-idempotent operation: will not be retried + + Args: + - row_key: the key of the row to mutate + - predicate: the filter to be applied to the contents of the specified row. + Depending on whether or not any results are yielded, + either true_case_mutations or false_case_mutations will be executed. + If None, checks that the row contains any values at all. + - true_case_mutations: + Changes to be atomically applied to the specified row if + predicate yields at least one cell when + applied to row_key. Entries are applied in order, + meaning that earlier mutations can be masked by later + ones. Must contain at least one entry if + false_case_mutations is empty, and at most 100000. + - false_case_mutations: + Changes to be atomically applied to the specified row if + predicate_filter does not yield any cells when + applied to row_key. Entries are applied in order, + meaning that earlier mutations can be masked by later + ones. Must contain at least one entry if + `true_case_mutations is empty, and at most 100000. + - operation_timeout: the time budget for the entire operation, in seconds. + Failed requests will not be retried. + - metadata: Strings which should be sent along with the request as metadata headers. + Returns: + - bool indicating whether the predicate was true or false + Raises: + - GoogleAPIError exceptions from grpc call + """ + pass + + async def read_modify_write_row( + self, + row_key: str|bytes, + rules: ReadModifyWriteRule|list[ReadModifyWriteRule]|dict|list[dict], + *, + operation_timeout:int|float|None=60, + metadata: list[tuple[str, str]]|None = None, + ) -> RowResponse: + """ + Reads and modifies a row atomically according to input ReadModifyWriteRules, + and returns the contents of all modified cells + + The new value for the timestamp is the greater of the existing timestamp or + the current server time. + + Non-idempotent operation: will not be retried + + Args: + - row_key: the key of the row to apply read/modify/write rules to + - rules: A rule or set of rules to apply to the row. + Rules are applied in order, meaning that earlier rules will affect the + results of later ones. + - operation_timeout: the time budget for the entire operation, in seconds. + Failed requests will not be retried. + - metadata: Strings which should be sent along with the request as metadata headers. + Returns: + - RowResponse: containing cell data that was modified as part of the + operation + Raises: + - GoogleAPIError exceptions from grpc call + """ pass if __name__ == "__main__": From 11b34935e99e00150dff423c71aafc9933310964 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 10:44:57 -0800 Subject: [PATCH 003/349] fixed missing files --- google/cloud/bigtable/__init__.py | 4 ++-- google/cloud/bigtable/gapic_version.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 google/cloud/bigtable/gapic_version.py diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index d24e0606f..b5a364c7f 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -17,7 +17,7 @@ __version__ = package_version.__version__ -from google.cloud.bigtable_async.client import BigtableDataClient -from google.cloud.bigtable_async.client import Table +from google.cloud.bigtable.client import BigtableDataClient +from google.cloud.bigtable.client import Table __all__ = ("BigtableDataClient", "Table") diff --git a/google/cloud/bigtable/gapic_version.py b/google/cloud/bigtable/gapic_version.py new file mode 100644 index 000000000..2788e5e55 --- /dev/null +++ b/google/cloud/bigtable/gapic_version.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +__version__ = "2.15.0" # {x-release-please-version} From 1ac0b7ad9ca0f337589921cd1c06f7d0ed5bd71e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 11:08:14 -0800 Subject: [PATCH 004/349] updated import paths --- google/cloud/bigtable/__init__.py | 4 +- google/cloud/bigtable/deprecated/__init__.py | 2 +- .../cloud/bigtable/deprecated/app_profile.py | 8 +-- google/cloud/bigtable/deprecated/backup.py | 20 +++--- google/cloud/bigtable/deprecated/batcher.py | 8 +-- google/cloud/bigtable/deprecated/client.py | 32 ++++----- google/cloud/bigtable/deprecated/cluster.py | 22 +++--- .../bigtable/deprecated/column_family.py | 2 +- .../bigtable/deprecated/encryption_info.py | 4 +- .../bigtable/deprecated/gapic_version.py | 16 ----- google/cloud/bigtable/deprecated/instance.py | 68 +++++++++---------- google/cloud/bigtable/deprecated/row.py | 18 ++--- google/cloud/bigtable/deprecated/row_data.py | 6 +- .../cloud/bigtable/deprecated/row_merger.py | 2 +- google/cloud/bigtable/deprecated/table.py | 64 ++++++++--------- 15 files changed, 130 insertions(+), 146 deletions(-) delete mode 100644 google/cloud/bigtable/deprecated/gapic_version.py diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index b5a364c7f..ff4b0cde2 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -15,9 +15,9 @@ # from google.cloud.bigtable import gapic_version as package_version -__version__ = package_version.__version__ - from google.cloud.bigtable.client import BigtableDataClient from google.cloud.bigtable.client import Table +__version__: str = package_version.__version__ + __all__ = ("BigtableDataClient", "Table") diff --git a/google/cloud/bigtable/deprecated/__init__.py b/google/cloud/bigtable/deprecated/__init__.py index 7331ff241..a54fffdf1 100644 --- a/google/cloud/bigtable/deprecated/__init__.py +++ b/google/cloud/bigtable/deprecated/__init__.py @@ -14,7 +14,7 @@ """Google Cloud Bigtable API package.""" -from google.cloud.bigtable.client import Client +from google.cloud.bigtable.deprecated.client import Client from google.cloud.bigtable import gapic_version as package_version diff --git a/google/cloud/bigtable/deprecated/app_profile.py b/google/cloud/bigtable/deprecated/app_profile.py index 8cde66146..a5c3df356 100644 --- a/google/cloud/bigtable/deprecated/app_profile.py +++ b/google/cloud/bigtable/deprecated/app_profile.py @@ -17,7 +17,7 @@ import re -from google.cloud.bigtable.enums import RoutingPolicyType +from google.cloud.bigtable.deprecated.enums import RoutingPolicyType from google.cloud.bigtable_admin_v2.types import instance from google.protobuf import field_mask_pb2 from google.api_core.exceptions import NotFound @@ -47,8 +47,8 @@ class AppProfile(object): :param: routing_policy_type: (Optional) The type of the routing policy. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.enums.RoutingPolicyType.ANY` - :data:`google.cloud.bigtable.enums.RoutingPolicyType.SINGLE` + :data:`google.cloud.bigtable.deprecated.enums.RoutingPolicyType.ANY` + :data:`google.cloud.bigtable.deprecated.enums.RoutingPolicyType.SINGLE` :type: description: str :param: description: (Optional) Long form description of the use @@ -148,7 +148,7 @@ def from_pb(cls, app_profile_pb, instance): :type app_profile_pb: :class:`instance.app_profile_pb` :param app_profile_pb: An instance protobuf object. - :type instance: :class:`google.cloud.bigtable.instance.Instance` + :type instance: :class:`google.cloud.bigtable.deprecated.instance.Instance` :param instance: The instance that owns the cluster. :rtype: :class:`AppProfile` diff --git a/google/cloud/bigtable/deprecated/backup.py b/google/cloud/bigtable/deprecated/backup.py index 6986d730a..fc15318bc 100644 --- a/google/cloud/bigtable/deprecated/backup.py +++ b/google/cloud/bigtable/deprecated/backup.py @@ -19,8 +19,8 @@ from google.cloud._helpers import _datetime_to_pb_timestamp # type: ignore from google.cloud.bigtable_admin_v2 import BigtableTableAdminClient from google.cloud.bigtable_admin_v2.types import table -from google.cloud.bigtable.encryption_info import EncryptionInfo -from google.cloud.bigtable.policy import Policy +from google.cloud.bigtable.deprecated.encryption_info import EncryptionInfo +from google.cloud.bigtable.deprecated.policy import Policy from google.cloud.exceptions import NotFound # type: ignore from google.protobuf import field_mask_pb2 @@ -50,7 +50,7 @@ class Backup(object): :type backup_id: str :param backup_id: The ID of the backup. - :type instance: :class:`~google.cloud.bigtable.instance.Instance` + :type instance: :class:`~google.cloud.bigtable.deprecated.instance.Instance` :param instance: The Instance that owns this Backup. :type cluster_id: str @@ -188,7 +188,7 @@ def expire_time(self, new_expire_time): def encryption_info(self): """Encryption info for this Backup. - :rtype: :class:`google.cloud.bigtable.encryption.EncryptionInfo` + :rtype: :class:`google.cloud.bigtable.deprecated.encryption.EncryptionInfo` :returns: The encryption information for this backup. """ return self._encryption_info @@ -238,10 +238,10 @@ def from_pb(cls, backup_pb, instance): :type backup_pb: :class:`table.Backup` :param backup_pb: A Backup protobuf object. - :type instance: :class:`Instance ` + :type instance: :class:`Instance ` :param instance: The Instance that owns the Backup. - :rtype: :class:`~google.cloud.bigtable.backup.Backup` + :rtype: :class:`~google.cloud.bigtable.deprecated.backup.Backup` :returns: The backup parsed from the protobuf response. :raises: ValueError: If the backup name does not match the expected format or the parsed project ID does not match the @@ -440,7 +440,7 @@ def restore(self, table_id, instance_id=None): def get_iam_policy(self): """Gets the IAM access control policy for this backup. - :rtype: :class:`google.cloud.bigtable.policy.Policy` + :rtype: :class:`google.cloud.bigtable.deprecated.policy.Policy` :returns: The current IAM policy of this backup. """ table_api = self._instance._client.table_admin_client @@ -452,13 +452,13 @@ def set_iam_policy(self, policy): existing policy. For more information about policy, please see documentation of - class `google.cloud.bigtable.policy.Policy` + class `google.cloud.bigtable.deprecated.policy.Policy` - :type policy: :class:`google.cloud.bigtable.policy.Policy` + :type policy: :class:`google.cloud.bigtable.deprecated.policy.Policy` :param policy: A new IAM policy to replace the current IAM policy of this backup. - :rtype: :class:`google.cloud.bigtable.policy.Policy` + :rtype: :class:`google.cloud.bigtable.deprecated.policy.Policy` :returns: The current IAM policy of this backup. """ table_api = self._instance._client.table_admin_client diff --git a/google/cloud/bigtable/deprecated/batcher.py b/google/cloud/bigtable/deprecated/batcher.py index 3c23f4436..58cf6b6e3 100644 --- a/google/cloud/bigtable/deprecated/batcher.py +++ b/google/cloud/bigtable/deprecated/batcher.py @@ -42,7 +42,7 @@ class MutationsBatcher(object): capability of asynchronous, parallel RPCs. :type table: class - :param table: class:`~google.cloud.bigtable.table.Table`. + :param table: class:`~google.cloud.bigtable.deprecated.table.Table`. :type flush_count: int :param flush_count: (Optional) Max number of rows to flush. If it @@ -76,7 +76,7 @@ def mutate(self, row): :dedent: 4 :type row: class - :param row: class:`~google.cloud.bigtable.row.DirectRow`. + :param row: class:`~google.cloud.bigtable.deprecated.row.DirectRow`. :raises: One of the following: * :exc:`~.table._BigtableRetryableError` if any @@ -115,8 +115,8 @@ def mutate_rows(self, rows): :end-before: [END bigtable_api_batcher_mutate_rows] :dedent: 4 - :type rows: list:[`~google.cloud.bigtable.row.DirectRow`] - :param rows: list:[`~google.cloud.bigtable.row.DirectRow`]. + :type rows: list:[`~google.cloud.bigtable.deprecated.row.DirectRow`] + :param rows: list:[`~google.cloud.bigtable.deprecated.row.DirectRow`]. :raises: One of the following: * :exc:`~.table._BigtableRetryableError` if any diff --git a/google/cloud/bigtable/deprecated/client.py b/google/cloud/bigtable/deprecated/client.py index c82a268c6..058055b60 100644 --- a/google/cloud/bigtable/deprecated/client.py +++ b/google/cloud/bigtable/deprecated/client.py @@ -18,14 +18,14 @@ In the hierarchy of API concepts -* a :class:`~google.cloud.bigtable.client.Client` owns an - :class:`~google.cloud.bigtable.instance.Instance` -* an :class:`~google.cloud.bigtable.instance.Instance` owns a - :class:`~google.cloud.bigtable.table.Table` -* a :class:`~google.cloud.bigtable.table.Table` owns a +* a :class:`~google.cloud.bigtable.deprecated.client.Client` owns an + :class:`~google.cloud.bigtable.deprecated.instance.Instance` +* an :class:`~google.cloud.bigtable.deprecated.instance.Instance` owns a + :class:`~google.cloud.bigtable.deprecated.table.Table` +* a :class:`~google.cloud.bigtable.deprecated.table.Table` owns a :class:`~.column_family.ColumnFamily` -* a :class:`~google.cloud.bigtable.table.Table` owns a - :class:`~google.cloud.bigtable.row.Row` (and all the cells in the row) +* a :class:`~google.cloud.bigtable.deprecated.table.Table` owns a + :class:`~google.cloud.bigtable.deprecated.row.Row` (and all the cells in the row) """ import os import warnings @@ -46,13 +46,13 @@ ) from google.cloud import bigtable -from google.cloud.bigtable.instance import Instance -from google.cloud.bigtable.cluster import Cluster +from google.cloud.bigtable.deprecated.instance import Instance +from google.cloud.bigtable.deprecated.cluster import Cluster from google.cloud.client import ClientWithProject # type: ignore from google.cloud.bigtable_admin_v2.types import instance -from google.cloud.bigtable.cluster import _CLUSTER_NAME_RE +from google.cloud.bigtable.deprecated.cluster import _CLUSTER_NAME_RE from google.cloud.environment_vars import BIGTABLE_EMULATOR # type: ignore @@ -430,10 +430,10 @@ def instance(self, instance_id, display_name=None, instance_type=None, labels=No :param instance_type: (Optional) The type of the instance. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.instance.InstanceType.PRODUCTION`. - :data:`google.cloud.bigtable.instance.InstanceType.DEVELOPMENT`, + :data:`google.cloud.bigtable.deprecated.instance.InstanceType.PRODUCTION`. + :data:`google.cloud.bigtable.deprecated.instance.InstanceType.DEVELOPMENT`, Defaults to - :data:`google.cloud.bigtable.instance.InstanceType.UNSPECIFIED`. + :data:`google.cloud.bigtable.deprecated.instance.InstanceType.UNSPECIFIED`. :type labels: dict :param labels: (Optional) Labels are a flexible and lightweight @@ -446,7 +446,7 @@ def instance(self, instance_id, display_name=None, instance_type=None, labels=No be between 0 and 63 characters long. Keys and values must both be under 128 bytes. - :rtype: :class:`~google.cloud.bigtable.instance.Instance` + :rtype: :class:`~google.cloud.bigtable.deprecated.instance.Instance` :returns: an instance owned by this client. """ return Instance( @@ -470,7 +470,7 @@ def list_instances(self): :rtype: tuple :returns: (instances, failed_locations), where 'instances' is list of - :class:`google.cloud.bigtable.instance.Instance`, and + :class:`google.cloud.bigtable.deprecated.instance.Instance`, and 'failed_locations' is a list of locations which could not be resolved. """ @@ -493,7 +493,7 @@ def list_clusters(self): :rtype: tuple :returns: (clusters, failed_locations), where 'clusters' is list of - :class:`google.cloud.bigtable.instance.Cluster`, and + :class:`google.cloud.bigtable.deprecated.instance.Cluster`, and 'failed_locations' is a list of strings representing locations which could not be resolved. """ diff --git a/google/cloud/bigtable/deprecated/cluster.py b/google/cloud/bigtable/deprecated/cluster.py index 11fb5492d..b60d3503c 100644 --- a/google/cloud/bigtable/deprecated/cluster.py +++ b/google/cloud/bigtable/deprecated/cluster.py @@ -42,7 +42,7 @@ class Cluster(object): :type cluster_id: str :param cluster_id: The ID of the cluster. - :type instance: :class:`~google.cloud.bigtable.instance.Instance` + :type instance: :class:`~google.cloud.bigtable.deprecated.instance.Instance` :param instance: The instance where the cluster resides. :type location_id: str @@ -62,10 +62,10 @@ class Cluster(object): :param default_storage_type: (Optional) The type of storage Possible values are represented by the following constants: - :data:`google.cloud.bigtable.enums.StorageType.SSD`. - :data:`google.cloud.bigtable.enums.StorageType.HDD`, + :data:`google.cloud.bigtable.deprecated.enums.StorageType.SSD`. + :data:`google.cloud.bigtable.deprecated.enums.StorageType.HDD`, Defaults to - :data:`google.cloud.bigtable.enums.StorageType.UNSPECIFIED`. + :data:`google.cloud.bigtable.deprecated.enums.StorageType.UNSPECIFIED`. :type kms_key_name: str :param kms_key_name: (Optional, Creation Only) The name of the KMS customer managed @@ -84,11 +84,11 @@ class Cluster(object): :param _state: (`OutputOnly`) The current state of the cluster. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.enums.Cluster.State.NOT_KNOWN`. - :data:`google.cloud.bigtable.enums.Cluster.State.READY`. - :data:`google.cloud.bigtable.enums.Cluster.State.CREATING`. - :data:`google.cloud.bigtable.enums.Cluster.State.RESIZING`. - :data:`google.cloud.bigtable.enums.Cluster.State.DISABLED`. + :data:`google.cloud.bigtable.deprecated.enums.Cluster.State.NOT_KNOWN`. + :data:`google.cloud.bigtable.deprecated.enums.Cluster.State.READY`. + :data:`google.cloud.bigtable.deprecated.enums.Cluster.State.CREATING`. + :data:`google.cloud.bigtable.deprecated.enums.Cluster.State.RESIZING`. + :data:`google.cloud.bigtable.deprecated.enums.Cluster.State.DISABLED`. :type min_serve_nodes: int :param min_serve_nodes: (Optional) The minimum number of nodes to be set in the cluster for autoscaling. @@ -150,7 +150,7 @@ def from_pb(cls, cluster_pb, instance): :type cluster_pb: :class:`instance.Cluster` :param cluster_pb: An instance protobuf object. - :type instance: :class:`google.cloud.bigtable.instance.Instance` + :type instance: :class:`google.cloud.bigtable.deprecated.instance.Instance` :param instance: The instance that owns the cluster. :rtype: :class:`Cluster` @@ -236,7 +236,7 @@ def name(self): @property def state(self): - """google.cloud.bigtable.enums.Cluster.State: state of cluster. + """google.cloud.bigtable.deprecated.enums.Cluster.State: state of cluster. For example: diff --git a/google/cloud/bigtable/deprecated/column_family.py b/google/cloud/bigtable/deprecated/column_family.py index 80232958d..3d4c1a642 100644 --- a/google/cloud/bigtable/deprecated/column_family.py +++ b/google/cloud/bigtable/deprecated/column_family.py @@ -195,7 +195,7 @@ class ColumnFamily(object): :param column_family_id: The ID of the column family. Must be of the form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``. - :type table: :class:`Table ` + :type table: :class:`Table ` :param table: The table that owns the column family. :type gc_rule: :class:`GarbageCollectionRule` diff --git a/google/cloud/bigtable/deprecated/encryption_info.py b/google/cloud/bigtable/deprecated/encryption_info.py index 1757297bc..daa0d9232 100644 --- a/google/cloud/bigtable/deprecated/encryption_info.py +++ b/google/cloud/bigtable/deprecated/encryption_info.py @@ -14,7 +14,7 @@ """Class for encryption info for tables and backups.""" -from google.cloud.bigtable.error import Status +from google.cloud.bigtable.deprecated.error import Status class EncryptionInfo: @@ -27,7 +27,7 @@ class EncryptionInfo: :type encryption_type: int :param encryption_type: See :class:`enums.EncryptionInfo.EncryptionType` - :type encryption_status: google.cloud.bigtable.encryption.Status + :type encryption_status: google.cloud.bigtable.deprecated.encryption.Status :param encryption_status: The encryption status. :type kms_key_version: str diff --git a/google/cloud/bigtable/deprecated/gapic_version.py b/google/cloud/bigtable/deprecated/gapic_version.py deleted file mode 100644 index 2788e5e55..000000000 --- a/google/cloud/bigtable/deprecated/gapic_version.py +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -__version__ = "2.15.0" # {x-release-please-version} diff --git a/google/cloud/bigtable/deprecated/instance.py b/google/cloud/bigtable/deprecated/instance.py index 6d092cefd..33475d261 100644 --- a/google/cloud/bigtable/deprecated/instance.py +++ b/google/cloud/bigtable/deprecated/instance.py @@ -16,9 +16,9 @@ import re -from google.cloud.bigtable.app_profile import AppProfile -from google.cloud.bigtable.cluster import Cluster -from google.cloud.bigtable.table import Table +from google.cloud.bigtable.deprecated.app_profile import AppProfile +from google.cloud.bigtable.deprecated.cluster import Cluster +from google.cloud.bigtable.deprecated.table import Table from google.protobuf import field_mask_pb2 @@ -28,7 +28,7 @@ from google.api_core.exceptions import NotFound -from google.cloud.bigtable.policy import Policy +from google.cloud.bigtable.deprecated.policy import Policy import warnings @@ -61,7 +61,7 @@ class Instance(object): :type instance_id: str :param instance_id: The ID of the instance. - :type client: :class:`Client ` + :type client: :class:`Client ` :param client: The client that owns the instance. Provides authorization and a project ID. @@ -75,10 +75,10 @@ class Instance(object): :param instance_type: (Optional) The type of the instance. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.enums.Instance.Type.PRODUCTION`. - :data:`google.cloud.bigtable.enums.Instance.Type.DEVELOPMENT`, + :data:`google.cloud.bigtable.deprecated.enums.Instance.Type.PRODUCTION`. + :data:`google.cloud.bigtable.deprecated.enums.Instance.Type.DEVELOPMENT`, Defaults to - :data:`google.cloud.bigtable.enums.Instance.Type.UNSPECIFIED`. + :data:`google.cloud.bigtable.deprecated.enums.Instance.Type.UNSPECIFIED`. :type labels: dict :param labels: (Optional) Labels are a flexible and lightweight @@ -95,9 +95,9 @@ class Instance(object): :param _state: (`OutputOnly`) The current state of the instance. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.enums.Instance.State.STATE_NOT_KNOWN`. - :data:`google.cloud.bigtable.enums.Instance.State.READY`. - :data:`google.cloud.bigtable.enums.Instance.State.CREATING`. + :data:`google.cloud.bigtable.deprecated.enums.Instance.State.STATE_NOT_KNOWN`. + :data:`google.cloud.bigtable.deprecated.enums.Instance.State.READY`. + :data:`google.cloud.bigtable.deprecated.enums.Instance.State.CREATING`. """ def __init__( @@ -141,7 +141,7 @@ def from_pb(cls, instance_pb, client): :type instance_pb: :class:`instance.Instance` :param instance_pb: An instance protobuf object. - :type client: :class:`Client ` + :type client: :class:`Client ` :param client: The client that owns the instance. :rtype: :class:`Instance` @@ -196,7 +196,7 @@ def name(self): @property def state(self): - """google.cloud.bigtable.enums.Instance.State: state of Instance. + """google.cloud.bigtable.deprecated.enums.Instance.State: state of Instance. For example: @@ -272,12 +272,12 @@ def create( persisting Bigtable data. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.enums.StorageType.SSD`. - :data:`google.cloud.bigtable.enums.StorageType.HDD`, + :data:`google.cloud.bigtable.deprecated.enums.StorageType.SSD`. + :data:`google.cloud.bigtable.deprecated.enums.StorageType.HDD`, Defaults to - :data:`google.cloud.bigtable.enums.StorageType.UNSPECIFIED`. + :data:`google.cloud.bigtable.deprecated.enums.StorageType.UNSPECIFIED`. - :type clusters: class:`~[~google.cloud.bigtable.cluster.Cluster]` + :type clusters: class:`~[~google.cloud.bigtable.deprecated.cluster.Cluster]` :param clusters: List of clusters to be created. :rtype: :class:`~google.api_core.operation.Operation` @@ -478,7 +478,7 @@ def get_iam_policy(self, requested_policy_version=None): than the one that was requested, based on the feature syntax in the policy fetched. - :rtype: :class:`google.cloud.bigtable.policy.Policy` + :rtype: :class:`google.cloud.bigtable.deprecated.policy.Policy` :returns: The current IAM policy of this instance """ args = {"resource": self.name} @@ -497,7 +497,7 @@ def set_iam_policy(self, policy): existing policy. For more information about policy, please see documentation of - class `google.cloud.bigtable.policy.Policy` + class `google.cloud.bigtable.deprecated.policy.Policy` For example: @@ -506,11 +506,11 @@ class `google.cloud.bigtable.policy.Policy` :end-before: [END bigtable_api_set_iam_policy] :dedent: 4 - :type policy: :class:`google.cloud.bigtable.policy.Policy` + :type policy: :class:`google.cloud.bigtable.deprecated.policy.Policy` :param policy: A new IAM policy to replace the current IAM policy of this instance - :rtype: :class:`google.cloud.bigtable.policy.Policy` + :rtype: :class:`google.cloud.bigtable.deprecated.policy.Policy` :returns: The current IAM policy of this instance. """ instance_admin_client = self._client.instance_admin_client @@ -586,12 +586,12 @@ def cluster( :param default_storage_type: (Optional) The type of storage Possible values are represented by the following constants: - :data:`google.cloud.bigtable.enums.StorageType.SSD`. - :data:`google.cloud.bigtable.enums.StorageType.HDD`, + :data:`google.cloud.bigtable.deprecated.enums.StorageType.SSD`. + :data:`google.cloud.bigtable.deprecated.enums.StorageType.HDD`, Defaults to - :data:`google.cloud.bigtable.enums.StorageType.UNSPECIFIED`. + :data:`google.cloud.bigtable.deprecated.enums.StorageType.UNSPECIFIED`. - :rtype: :class:`~google.cloud.bigtable.instance.Cluster` + :rtype: :class:`~google.cloud.bigtable.deprecated.instance.Cluster` :returns: a cluster owned by this instance. :type kms_key_name: str @@ -635,7 +635,7 @@ def list_clusters(self): :rtype: tuple :returns: (clusters, failed_locations), where 'clusters' is list of - :class:`google.cloud.bigtable.instance.Cluster`, and + :class:`google.cloud.bigtable.deprecated.instance.Cluster`, and 'failed_locations' is a list of locations which could not be resolved. """ @@ -664,7 +664,7 @@ def table(self, table_id, mutation_timeout=None, app_profile_id=None): :type app_profile_id: str :param app_profile_id: (Optional) The unique name of the AppProfile. - :rtype: :class:`Table ` + :rtype: :class:`Table ` :returns: The table owned by this instance. """ return Table( @@ -684,7 +684,7 @@ def list_tables(self): :end-before: [END bigtable_api_list_tables] :dedent: 4 - :rtype: list of :class:`Table ` + :rtype: list of :class:`Table ` :returns: The list of tables owned by the instance. :raises: :class:`ValueError ` if one of the returned tables has a name that is not of the expected format. @@ -731,8 +731,8 @@ def app_profile( :param: routing_policy_type: The type of the routing policy. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.enums.RoutingPolicyType.ANY` - :data:`google.cloud.bigtable.enums.RoutingPolicyType.SINGLE` + :data:`google.cloud.bigtable.deprecated.enums.RoutingPolicyType.ANY` + :data:`google.cloud.bigtable.deprecated.enums.RoutingPolicyType.SINGLE` :type: description: str :param: description: (Optional) Long form description of the use @@ -753,7 +753,7 @@ def app_profile( transactional writes for ROUTING_POLICY_TYPE_SINGLE. - :rtype: :class:`~google.cloud.bigtable.app_profile.AppProfile>` + :rtype: :class:`~google.cloud.bigtable.deprecated.app_profile.AppProfile>` :returns: AppProfile for this instance. """ return AppProfile( @@ -776,10 +776,10 @@ def list_app_profiles(self): :end-before: [END bigtable_api_list_app_profiles] :dedent: 4 - :rtype: :list:[`~google.cloud.bigtable.app_profile.AppProfile`] - :returns: A :list:[`~google.cloud.bigtable.app_profile.AppProfile`]. + :rtype: :list:[`~google.cloud.bigtable.deprecated.app_profile.AppProfile`] + :returns: A :list:[`~google.cloud.bigtable.deprecated.app_profile.AppProfile`]. By default, this is a list of - :class:`~google.cloud.bigtable.app_profile.AppProfile` + :class:`~google.cloud.bigtable.deprecated.app_profile.AppProfile` instances. """ resp = self._client.instance_admin_client.list_app_profiles( diff --git a/google/cloud/bigtable/deprecated/row.py b/google/cloud/bigtable/deprecated/row.py index 752458a08..3b114a74a 100644 --- a/google/cloud/bigtable/deprecated/row.py +++ b/google/cloud/bigtable/deprecated/row.py @@ -51,7 +51,7 @@ class Row(object): :type row_key: bytes :param row_key: The key for the current row. - :type table: :class:`Table ` + :type table: :class:`Table ` :param table: (Optional) The table that owns the row. """ @@ -86,7 +86,7 @@ def table(self): :end-before: [END bigtable_api_row_table] :dedent: 4 - :rtype: table: :class:`Table ` + :rtype: table: :class:`Table ` :returns: table: The table that owns the row. """ return self._table @@ -105,7 +105,7 @@ class _SetDeleteRow(Row): :type row_key: bytes :param row_key: The key for the current row. - :type table: :class:`Table ` + :type table: :class:`Table ` :param table: The table that owns the row. """ @@ -275,11 +275,11 @@ class DirectRow(_SetDeleteRow): :type row_key: bytes :param row_key: The key for the current row. - :type table: :class:`Table ` + :type table: :class:`Table ` :param table: (Optional) The table that owns the row. This is used for the :meth: `commit` only. Alternatively, DirectRows can be persisted via - :meth:`~google.cloud.bigtable.table.Table.mutate_rows`. + :meth:`~google.cloud.bigtable.deprecated.table.Table.mutate_rows`. """ def __init__(self, row_key, table=None): @@ -519,7 +519,7 @@ class ConditionalRow(_SetDeleteRow): :type row_key: bytes :param row_key: The key for the current row. - :type table: :class:`Table ` + :type table: :class:`Table ` :param table: The table that owns the row. :type filter_: :class:`.RowFilter` @@ -791,7 +791,7 @@ class AppendRow(Row): :type row_key: bytes :param row_key: The key for the current row. - :type table: :class:`Table ` + :type table: :class:`Table ` :param table: The table that owns the row. """ @@ -1107,7 +1107,7 @@ def find_cells(self, column_family_id, column): are located. Returns: - List[~google.cloud.bigtable.row_data.Cell]: The cells stored in the + List[~google.cloud.bigtable.deprecated.row_data.Cell]: The cells stored in the specified column. Raises: @@ -1147,7 +1147,7 @@ def cell_value(self, column_family_id, column, index=0): not specified, will return the first cell. Returns: - ~google.cloud.bigtable.row_data.Cell value: The cell value stored + ~google.cloud.bigtable.deprecated.row_data.Cell value: The cell value stored in the specified column and specified index. Raises: diff --git a/google/cloud/bigtable/deprecated/row_data.py b/google/cloud/bigtable/deprecated/row_data.py index a50fab1ee..9daa1ed8f 100644 --- a/google/cloud/bigtable/deprecated/row_data.py +++ b/google/cloud/bigtable/deprecated/row_data.py @@ -23,10 +23,10 @@ from google.api_core import retry from google.cloud._helpers import _to_bytes # type: ignore -from google.cloud.bigtable.row_merger import _RowMerger, _State +from google.cloud.bigtable.deprecated.row_merger import _RowMerger, _State from google.cloud.bigtable_v2.types import bigtable as data_messages_v2_pb2 from google.cloud.bigtable_v2.types import data as data_v2_pb2 -from google.cloud.bigtable.row import Cell, InvalidChunk, PartialRowData +from google.cloud.bigtable.deprecated.row import Cell, InvalidChunk, PartialRowData # Some classes need to be re-exported here to keep backwards @@ -98,7 +98,7 @@ def _retry_read_rows_exception(exc): """The default retry strategy to be used on retry-able errors. Used by -:meth:`~google.cloud.bigtable.row_data.PartialRowsData._read_next_response`. +:meth:`~google.cloud.bigtable.deprecated.row_data.PartialRowsData._read_next_response`. """ diff --git a/google/cloud/bigtable/deprecated/row_merger.py b/google/cloud/bigtable/deprecated/row_merger.py index 515b91df7..d29d64eb2 100644 --- a/google/cloud/bigtable/deprecated/row_merger.py +++ b/google/cloud/bigtable/deprecated/row_merger.py @@ -1,6 +1,6 @@ from enum import Enum from collections import OrderedDict -from google.cloud.bigtable.row import Cell, PartialRowData, InvalidChunk +from google.cloud.bigtable.deprecated.row import Cell, PartialRowData, InvalidChunk _MISSING_COLUMN_FAMILY = "Column family {} is not among the cells stored in this row." _MISSING_COLUMN = ( diff --git a/google/cloud/bigtable/deprecated/table.py b/google/cloud/bigtable/deprecated/table.py index 8605992ba..cf60b066e 100644 --- a/google/cloud/bigtable/deprecated/table.py +++ b/google/cloud/bigtable/deprecated/table.py @@ -28,24 +28,24 @@ from google.api_core.retry import if_exception_type from google.api_core.retry import Retry from google.cloud._helpers import _to_bytes # type: ignore -from google.cloud.bigtable.backup import Backup -from google.cloud.bigtable.column_family import _gc_rule_from_pb -from google.cloud.bigtable.column_family import ColumnFamily -from google.cloud.bigtable.batcher import MutationsBatcher -from google.cloud.bigtable.batcher import FLUSH_COUNT, MAX_ROW_BYTES -from google.cloud.bigtable.encryption_info import EncryptionInfo -from google.cloud.bigtable.policy import Policy -from google.cloud.bigtable.row import AppendRow -from google.cloud.bigtable.row import ConditionalRow -from google.cloud.bigtable.row import DirectRow -from google.cloud.bigtable.row_data import ( +from google.cloud.bigtable.deprecated.backup import Backup +from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb +from google.cloud.bigtable.deprecated.column_family import ColumnFamily +from google.cloud.bigtable.deprecated.batcher import MutationsBatcher +from google.cloud.bigtable.deprecated.batcher import FLUSH_COUNT, MAX_ROW_BYTES +from google.cloud.bigtable.deprecated.encryption_info import EncryptionInfo +from google.cloud.bigtable.deprecated.policy import Policy +from google.cloud.bigtable.deprecated.row import AppendRow +from google.cloud.bigtable.deprecated.row import ConditionalRow +from google.cloud.bigtable.deprecated.row import DirectRow +from google.cloud.bigtable.deprecated.row_data import ( PartialRowsData, _retriable_internal_server_error, ) -from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS -from google.cloud.bigtable.row_set import RowSet -from google.cloud.bigtable.row_set import RowRange -from google.cloud.bigtable import enums +from google.cloud.bigtable.deprecated.row_data import DEFAULT_RETRY_READ_ROWS +from google.cloud.bigtable.deprecated.row_set import RowSet +from google.cloud.bigtable.deprecated.row_set import RowRange +from google.cloud.bigtable.deprecated import enums from google.cloud.bigtable_v2.types import bigtable as data_messages_v2_pb2 from google.cloud.bigtable_admin_v2 import BigtableTableAdminClient from google.cloud.bigtable_admin_v2.types import table as admin_messages_v2_pb2 @@ -88,7 +88,7 @@ class _BigtableRetryableError(Exception): ) """The default retry strategy to be used on retry-able errors. -Used by :meth:`~google.cloud.bigtable.table.Table.mutate_rows`. +Used by :meth:`~google.cloud.bigtable.deprecated.table.Table.mutate_rows`. """ @@ -119,7 +119,7 @@ class Table(object): :type table_id: str :param table_id: The ID of the table. - :type instance: :class:`~google.cloud.bigtable.instance.Instance` + :type instance: :class:`~google.cloud.bigtable.deprecated.instance.Instance` :param instance: The instance that owns the table. :type app_profile_id: str @@ -172,7 +172,7 @@ def get_iam_policy(self): :end-before: [END bigtable_api_table_get_iam_policy] :dedent: 4 - :rtype: :class:`google.cloud.bigtable.policy.Policy` + :rtype: :class:`google.cloud.bigtable.deprecated.policy.Policy` :returns: The current IAM policy of this table. """ table_client = self._instance._client.table_admin_client @@ -184,7 +184,7 @@ def set_iam_policy(self, policy): existing policy. For more information about policy, please see documentation of - class `google.cloud.bigtable.policy.Policy` + class `google.cloud.bigtable.deprecated.policy.Policy` For example: @@ -193,11 +193,11 @@ class `google.cloud.bigtable.policy.Policy` :end-before: [END bigtable_api_table_set_iam_policy] :dedent: 4 - :type policy: :class:`google.cloud.bigtable.policy.Policy` + :type policy: :class:`google.cloud.bigtable.deprecated.policy.Policy` :param policy: A new IAM policy to replace the current IAM policy of this table. - :rtype: :class:`google.cloud.bigtable.policy.Policy` + :rtype: :class:`google.cloud.bigtable.deprecated.policy.Policy` :returns: The current IAM policy of this table. """ table_client = self._instance._client.table_admin_client @@ -271,7 +271,7 @@ def row(self, row_key, filter_=None, append=False): .. warning:: At most one of ``filter_`` and ``append`` can be used in a - :class:`~google.cloud.bigtable.row.Row`. + :class:`~google.cloud.bigtable.deprecated.row.Row`. :type row_key: bytes :param row_key: The key for the row being created. @@ -284,7 +284,7 @@ def row(self, row_key, filter_=None, append=False): :param append: (Optional) Flag to determine if the row should be used for append mutations. - :rtype: :class:`~google.cloud.bigtable.row.Row` + :rtype: :class:`~google.cloud.bigtable.deprecated.row.Row` :returns: A row owned by this table. :raises: :class:`ValueError ` if both ``filter_`` and ``append`` are used. @@ -307,7 +307,7 @@ def row(self, row_key, filter_=None, append=False): return DirectRow(row_key, self) def append_row(self, row_key): - """Create a :class:`~google.cloud.bigtable.row.AppendRow` associated with this table. + """Create a :class:`~google.cloud.bigtable.deprecated.row.AppendRow` associated with this table. For example: @@ -325,7 +325,7 @@ def append_row(self, row_key): return AppendRow(row_key, self) def direct_row(self, row_key): - """Create a :class:`~google.cloud.bigtable.row.DirectRow` associated with this table. + """Create a :class:`~google.cloud.bigtable.deprecated.row.DirectRow` associated with this table. For example: @@ -343,7 +343,7 @@ def direct_row(self, row_key): return DirectRow(row_key, self) def conditional_row(self, row_key, filter_): - """Create a :class:`~google.cloud.bigtable.row.ConditionalRow` associated with this table. + """Create a :class:`~google.cloud.bigtable.deprecated.row.ConditionalRow` associated with this table. For example: @@ -515,7 +515,7 @@ def get_encryption_info(self): :rtype: dict :returns: Dictionary of encryption info for this table. Keys are cluster ids and - values are tuples of :class:`google.cloud.bigtable.encryption.EncryptionInfo` instances. + values are tuples of :class:`google.cloud.bigtable.deprecated.encryption.EncryptionInfo` instances. """ ENCRYPTION_VIEW = enums.Table.View.ENCRYPTION_VIEW table_client = self._instance._client.table_admin_client @@ -967,7 +967,7 @@ def list_backups(self, cluster_id=None, filter_=None, order_by=None, page_size=0 number of resources in a page. :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigtable.backup.Backup` + :returns: Iterator of :class:`~google.cloud.bigtable.deprecated.backup.Backup` resources within the current Instance. :raises: :class:`ValueError ` if one of the returned Backups' name is not of the expected format. @@ -1367,8 +1367,8 @@ def _check_row_table_name(table_name, row): :type table_name: str :param table_name: The name of the table. - :type row: :class:`~google.cloud.bigtable.row.Row` - :param row: An instance of :class:`~google.cloud.bigtable.row.Row` + :type row: :class:`~google.cloud.bigtable.deprecated.row.Row` + :param row: An instance of :class:`~google.cloud.bigtable.deprecated.row.Row` subclasses. :raises: :exc:`~.table.TableMismatchError` if the row does not belong to @@ -1384,8 +1384,8 @@ def _check_row_table_name(table_name, row): def _check_row_type(row): """Checks that a row is an instance of :class:`.DirectRow`. - :type row: :class:`~google.cloud.bigtable.row.Row` - :param row: An instance of :class:`~google.cloud.bigtable.row.Row` + :type row: :class:`~google.cloud.bigtable.deprecated.row.Row` + :param row: An instance of :class:`~google.cloud.bigtable.deprecated.row.Row` subclasses. :raises: :class:`TypeError ` if the row is not an From cf981d81b190e4e511d8529df4e1a983cc2647ff Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 11:09:13 -0800 Subject: [PATCH 005/349] updated unit tests --- tests/unit/__init__.py | 2 +- tests/unit/v2_client/__init__.py | 15 ++ tests/unit/{ => v2_client}/_testing.py | 0 .../read-rows-acceptance-test.json | 0 .../unit/{ => v2_client}/test_app_profile.py | 44 ++-- tests/unit/{ => v2_client}/test_backup.py | 36 +-- tests/unit/{ => v2_client}/test_batcher.py | 10 +- tests/unit/{ => v2_client}/test_client.py | 42 ++-- tests/unit/{ => v2_client}/test_cluster.py | 68 +++--- .../{ => v2_client}/test_column_family.py | 68 +++--- .../{ => v2_client}/test_encryption_info.py | 8 +- tests/unit/{ => v2_client}/test_error.py | 2 +- tests/unit/{ => v2_client}/test_instance.py | 50 ++-- tests/unit/{ => v2_client}/test_policy.py | 28 +-- tests/unit/{ => v2_client}/test_row.py | 34 +-- tests/unit/{ => v2_client}/test_row_data.py | 58 ++--- .../unit/{ => v2_client}/test_row_filters.py | 214 +++++++++--------- tests/unit/{ => v2_client}/test_row_merger.py | 4 +- tests/unit/{ => v2_client}/test_row_set.py | 60 ++--- tests/unit/{ => v2_client}/test_table.py | 172 +++++++------- 20 files changed, 465 insertions(+), 450 deletions(-) create mode 100644 tests/unit/v2_client/__init__.py rename tests/unit/{ => v2_client}/_testing.py (100%) rename tests/unit/{ => v2_client}/read-rows-acceptance-test.json (100%) rename tests/unit/{ => v2_client}/test_app_profile.py (94%) rename tests/unit/{ => v2_client}/test_backup.py (96%) rename tests/unit/{ => v2_client}/test_batcher.py (91%) rename tests/unit/{ => v2_client}/test_client.py (94%) rename tests/unit/{ => v2_client}/test_cluster.py (94%) rename tests/unit/{ => v2_client}/test_column_family.py (87%) rename tests/unit/{ => v2_client}/test_encryption_info.py (94%) rename tests/unit/{ => v2_client}/test_error.py (97%) rename tests/unit/{ => v2_client}/test_instance.py (95%) rename tests/unit/{ => v2_client}/test_policy.py (89%) rename tests/unit/{ => v2_client}/test_row.py (95%) rename tests/unit/{ => v2_client}/test_row_data.py (94%) rename tests/unit/{ => v2_client}/test_row_filters.py (77%) rename tests/unit/{ => v2_client}/test_row_merger.py (97%) rename tests/unit/{ => v2_client}/test_row_set.py (79%) rename tests/unit/{ => v2_client}/test_table.py (91%) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index e8e1c3845..89a37dc92 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/unit/v2_client/__init__.py b/tests/unit/v2_client/__init__.py new file mode 100644 index 000000000..e8e1c3845 --- /dev/null +++ b/tests/unit/v2_client/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/tests/unit/_testing.py b/tests/unit/v2_client/_testing.py similarity index 100% rename from tests/unit/_testing.py rename to tests/unit/v2_client/_testing.py diff --git a/tests/unit/read-rows-acceptance-test.json b/tests/unit/v2_client/read-rows-acceptance-test.json similarity index 100% rename from tests/unit/read-rows-acceptance-test.json rename to tests/unit/v2_client/read-rows-acceptance-test.json diff --git a/tests/unit/test_app_profile.py b/tests/unit/v2_client/test_app_profile.py similarity index 94% rename from tests/unit/test_app_profile.py rename to tests/unit/v2_client/test_app_profile.py index 660ee7899..575f25194 100644 --- a/tests/unit/test_app_profile.py +++ b/tests/unit/v2_client/test_app_profile.py @@ -32,19 +32,19 @@ def _make_app_profile(*args, **kwargs): - from google.cloud.bigtable.app_profile import AppProfile + from google.cloud.bigtable.deprecated.app_profile import AppProfile return AppProfile(*args, **kwargs) def _make_client(*args, **kwargs): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client return Client(*args, **kwargs) def test_app_profile_constructor_defaults(): - from google.cloud.bigtable.app_profile import AppProfile + from google.cloud.bigtable.deprecated.app_profile import AppProfile client = _Client(PROJECT) instance = _Instance(INSTANCE_ID, client) @@ -60,7 +60,7 @@ def test_app_profile_constructor_defaults(): def test_app_profile_constructor_explicit(): - from google.cloud.bigtable.enums import RoutingPolicyType + from google.cloud.bigtable.deprecated.enums import RoutingPolicyType ANY = RoutingPolicyType.ANY DESCRIPTION_1 = "routing policy any" @@ -99,7 +99,7 @@ def test_app_profile_constructor_explicit(): def test_app_profile_constructor_multi_cluster_ids(): - from google.cloud.bigtable.enums import RoutingPolicyType + from google.cloud.bigtable.deprecated.enums import RoutingPolicyType ANY = RoutingPolicyType.ANY DESCRIPTION_1 = "routing policy any" @@ -166,8 +166,8 @@ def test_app_profile___ne__(): def test_app_profile_from_pb_success_w_routing_any(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.app_profile import AppProfile - from google.cloud.bigtable.enums import RoutingPolicyType + from google.cloud.bigtable.deprecated.app_profile import AppProfile + from google.cloud.bigtable.deprecated.enums import RoutingPolicyType client = _Client(PROJECT) instance = _Instance(INSTANCE_ID, client) @@ -195,8 +195,8 @@ def test_app_profile_from_pb_success_w_routing_any(): def test_app_profile_from_pb_success_w_routing_any_multi_cluster_ids(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.app_profile import AppProfile - from google.cloud.bigtable.enums import RoutingPolicyType + from google.cloud.bigtable.deprecated.app_profile import AppProfile + from google.cloud.bigtable.deprecated.enums import RoutingPolicyType client = _Client(PROJECT) instance = _Instance(INSTANCE_ID, client) @@ -226,8 +226,8 @@ def test_app_profile_from_pb_success_w_routing_any_multi_cluster_ids(): def test_app_profile_from_pb_success_w_routing_single(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.app_profile import AppProfile - from google.cloud.bigtable.enums import RoutingPolicyType + from google.cloud.bigtable.deprecated.app_profile import AppProfile + from google.cloud.bigtable.deprecated.enums import RoutingPolicyType client = _Client(PROJECT) instance = _Instance(INSTANCE_ID, client) @@ -259,7 +259,7 @@ def test_app_profile_from_pb_success_w_routing_single(): def test_app_profile_from_pb_w_bad_app_profile_name(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.app_profile import AppProfile + from google.cloud.bigtable.deprecated.app_profile import AppProfile bad_app_profile_name = "BAD_NAME" @@ -271,7 +271,7 @@ def test_app_profile_from_pb_w_bad_app_profile_name(): def test_app_profile_from_pb_w_instance_id_mistmatch(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.app_profile import AppProfile + from google.cloud.bigtable.deprecated.app_profile import AppProfile ALT_INSTANCE_ID = "ALT_INSTANCE_ID" client = _Client(PROJECT) @@ -286,7 +286,7 @@ def test_app_profile_from_pb_w_instance_id_mistmatch(): def test_app_profile_from_pb_w_project_mistmatch(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.app_profile import AppProfile + from google.cloud.bigtable.deprecated.app_profile import AppProfile ALT_PROJECT = "ALT_PROJECT" client = _Client(project=ALT_PROJECT) @@ -304,7 +304,7 @@ def test_app_profile_reload_w_routing_any(): BigtableInstanceAdminClient, ) from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.enums import RoutingPolicyType + from google.cloud.bigtable.deprecated.enums import RoutingPolicyType api = mock.create_autospec(BigtableInstanceAdminClient) credentials = _make_credentials() @@ -400,8 +400,8 @@ def test_app_profile_create_w_routing_any(): from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) - from google.cloud.bigtable.app_profile import AppProfile - from google.cloud.bigtable.enums import RoutingPolicyType + from google.cloud.bigtable.deprecated.app_profile import AppProfile + from google.cloud.bigtable.deprecated.enums import RoutingPolicyType credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -461,8 +461,8 @@ def test_app_profile_create_w_routing_single(): from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) - from google.cloud.bigtable.app_profile import AppProfile - from google.cloud.bigtable.enums import RoutingPolicyType + from google.cloud.bigtable.deprecated.app_profile import AppProfile + from google.cloud.bigtable.deprecated.enums import RoutingPolicyType credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -533,7 +533,7 @@ def test_app_profile_update_w_routing_any(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.enums import RoutingPolicyType + from google.cloud.bigtable.deprecated.enums import RoutingPolicyType from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) @@ -608,7 +608,7 @@ def test_app_profile_update_w_routing_any_multi_cluster_ids(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.enums import RoutingPolicyType + from google.cloud.bigtable.deprecated.enums import RoutingPolicyType from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) @@ -684,7 +684,7 @@ def test_app_profile_update_w_routing_single(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.enums import RoutingPolicyType + from google.cloud.bigtable.deprecated.enums import RoutingPolicyType from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) diff --git a/tests/unit/test_backup.py b/tests/unit/v2_client/test_backup.py similarity index 96% rename from tests/unit/test_backup.py rename to tests/unit/v2_client/test_backup.py index 9882ca339..34cc8823a 100644 --- a/tests/unit/test_backup.py +++ b/tests/unit/v2_client/test_backup.py @@ -48,7 +48,7 @@ def _make_table_admin_client(): def _make_backup(*args, **kwargs): - from google.cloud.bigtable.backup import Backup + from google.cloud.bigtable.deprecated.backup import Backup return Backup(*args, **kwargs) @@ -102,7 +102,7 @@ def test_backup_constructor_explicit(): def test_backup_from_pb_w_project_mismatch(): from google.cloud.bigtable_admin_v2.types import table - from google.cloud.bigtable.backup import Backup + from google.cloud.bigtable.deprecated.backup import Backup alt_project_id = "alt-project-id" client = _Client(project=alt_project_id) @@ -115,7 +115,7 @@ def test_backup_from_pb_w_project_mismatch(): def test_backup_from_pb_w_instance_mismatch(): from google.cloud.bigtable_admin_v2.types import table - from google.cloud.bigtable.backup import Backup + from google.cloud.bigtable.deprecated.backup import Backup alt_instance = "/projects/%s/instances/alt-instance" % PROJECT_ID client = _Client() @@ -128,7 +128,7 @@ def test_backup_from_pb_w_instance_mismatch(): def test_backup_from_pb_w_bad_name(): from google.cloud.bigtable_admin_v2.types import table - from google.cloud.bigtable.backup import Backup + from google.cloud.bigtable.deprecated.backup import Backup client = _Client() instance = _Instance(INSTANCE_NAME, client) @@ -139,10 +139,10 @@ def test_backup_from_pb_w_bad_name(): def test_backup_from_pb_success(): - from google.cloud.bigtable.encryption_info import EncryptionInfo - from google.cloud.bigtable.error import Status + from google.cloud.bigtable.deprecated.encryption_info import EncryptionInfo + from google.cloud.bigtable.deprecated.error import Status from google.cloud.bigtable_admin_v2.types import table - from google.cloud.bigtable.backup import Backup + from google.cloud.bigtable.deprecated.backup import Backup from google.cloud._helpers import _datetime_to_pb_timestamp from google.rpc.code_pb2 import Code @@ -190,7 +190,7 @@ def test_backup_from_pb_success(): def test_backup_name(): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) @@ -225,7 +225,7 @@ def test_backup_parent_none(): def test_backup_parent_w_cluster(): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) @@ -242,7 +242,7 @@ def test_backup_parent_w_cluster(): def test_backup_source_table_none(): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) @@ -258,7 +258,7 @@ def test_backup_source_table_none(): def test_backup_source_table_valid(): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) @@ -473,7 +473,7 @@ def test_backup_create_w_expire_time_not_set(): def test_backup_create_success(): from google.cloud._helpers import _datetime_to_pb_timestamp from google.cloud.bigtable_admin_v2.types import table - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client op_future = object() credentials = _make_credentials() @@ -806,12 +806,12 @@ def test_backup_restore_to_another_instance(): def test_backup_get_iam_policy(): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_table_admin import ( BigtableTableAdminClient, ) from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = Client(project=PROJECT_ID, credentials=credentials, admin=True) @@ -842,13 +842,13 @@ def test_backup_get_iam_policy(): def test_backup_set_iam_policy(): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_table_admin import ( BigtableTableAdminClient, ) from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.policy import Policy - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = Client(project=PROJECT_ID, credentials=credentials, admin=True) @@ -887,7 +887,7 @@ def test_backup_set_iam_policy(): def test_backup_test_iam_permissions(): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_table_admin import ( BigtableTableAdminClient, ) diff --git a/tests/unit/test_batcher.py b/tests/unit/v2_client/test_batcher.py similarity index 91% rename from tests/unit/test_batcher.py rename to tests/unit/v2_client/test_batcher.py index 9ae6ed175..0793ed480 100644 --- a/tests/unit/test_batcher.py +++ b/tests/unit/v2_client/test_batcher.py @@ -16,14 +16,14 @@ import mock import pytest -from google.cloud.bigtable.row import DirectRow +from google.cloud.bigtable.deprecated.row import DirectRow TABLE_ID = "table-id" TABLE_NAME = "/tables/" + TABLE_ID def _make_mutation_batcher(table, **kw): - from google.cloud.bigtable.batcher import MutationsBatcher + from google.cloud.bigtable.deprecated.batcher import MutationsBatcher return MutationsBatcher(table, **kw) @@ -92,9 +92,9 @@ def test_mutation_batcher_mutate_w_max_flush_count(): assert table.mutation_calls == 1 -@mock.patch("google.cloud.bigtable.batcher.MAX_MUTATIONS", new=3) +@mock.patch("google.cloud.bigtable.deprecated.batcher.MAX_MUTATIONS", new=3) def test_mutation_batcher_mutate_with_max_mutations_failure(): - from google.cloud.bigtable.batcher import MaxMutationsError + from google.cloud.bigtable.deprecated.batcher import MaxMutationsError table = _Table(TABLE_NAME) mutation_batcher = _make_mutation_batcher(table=table) @@ -109,7 +109,7 @@ def test_mutation_batcher_mutate_with_max_mutations_failure(): mutation_batcher.mutate(row) -@mock.patch("google.cloud.bigtable.batcher.MAX_MUTATIONS", new=3) +@mock.patch("google.cloud.bigtable.deprecated.batcher.MAX_MUTATIONS", new=3) def test_mutation_batcher_mutate_w_max_mutations(): table = _Table(TABLE_NAME) mutation_batcher = _make_mutation_batcher(table=table) diff --git a/tests/unit/test_client.py b/tests/unit/v2_client/test_client.py similarity index 94% rename from tests/unit/test_client.py rename to tests/unit/v2_client/test_client.py index 5944c58a3..82e70f8a6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/v2_client/test_client.py @@ -25,7 +25,7 @@ def _invoke_client_factory(client_class, **kw): - from google.cloud.bigtable.client import _create_gapic_client + from google.cloud.bigtable.deprecated.client import _create_gapic_client return _create_gapic_client(client_class, **kw) @@ -101,7 +101,7 @@ def __init__(self, credentials, emulator_host=None, emulator_channel=None): def _make_client(*args, **kwargs): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client return Client(*args, **kwargs) @@ -109,8 +109,8 @@ def _make_client(*args, **kwargs): @mock.patch("os.environ", {}) def test_client_constructor_defaults(): from google.api_core import client_info - from google.cloud.bigtable import __version__ - from google.cloud.bigtable.client import DATA_SCOPE + from google.cloud.bigtable.deprecated import __version__ + from google.cloud.bigtable.deprecated.client import DATA_SCOPE credentials = _make_credentials() @@ -131,8 +131,8 @@ def test_client_constructor_defaults(): def test_client_constructor_explicit(): import warnings - from google.cloud.bigtable.client import ADMIN_SCOPE - from google.cloud.bigtable.client import DATA_SCOPE + from google.cloud.bigtable.deprecated.client import ADMIN_SCOPE + from google.cloud.bigtable.deprecated.client import DATA_SCOPE credentials = _make_credentials() client_info = mock.Mock() @@ -171,8 +171,8 @@ def test_client_constructor_w_both_admin_and_read_only(): def test_client_constructor_w_emulator_host(): from google.cloud.environment_vars import BIGTABLE_EMULATOR - from google.cloud.bigtable.client import _DEFAULT_BIGTABLE_EMULATOR_CLIENT - from google.cloud.bigtable.client import _GRPC_CHANNEL_OPTIONS + from google.cloud.bigtable.deprecated.client import _DEFAULT_BIGTABLE_EMULATOR_CLIENT + from google.cloud.bigtable.deprecated.client import _GRPC_CHANNEL_OPTIONS emulator_host = "localhost:8081" with mock.patch("os.environ", {BIGTABLE_EMULATOR: emulator_host}): @@ -195,7 +195,7 @@ def test_client_constructor_w_emulator_host(): def test_client_constructor_w_emulator_host_w_project(): from google.cloud.environment_vars import BIGTABLE_EMULATOR - from google.cloud.bigtable.client import _GRPC_CHANNEL_OPTIONS + from google.cloud.bigtable.deprecated.client import _GRPC_CHANNEL_OPTIONS emulator_host = "localhost:8081" with mock.patch("os.environ", {BIGTABLE_EMULATOR: emulator_host}): @@ -216,8 +216,8 @@ def test_client_constructor_w_emulator_host_w_project(): def test_client_constructor_w_emulator_host_w_credentials(): from google.cloud.environment_vars import BIGTABLE_EMULATOR - from google.cloud.bigtable.client import _DEFAULT_BIGTABLE_EMULATOR_CLIENT - from google.cloud.bigtable.client import _GRPC_CHANNEL_OPTIONS + from google.cloud.bigtable.deprecated.client import _DEFAULT_BIGTABLE_EMULATOR_CLIENT + from google.cloud.bigtable.deprecated.client import _GRPC_CHANNEL_OPTIONS emulator_host = "localhost:8081" credentials = _make_credentials() @@ -238,15 +238,15 @@ def test_client_constructor_w_emulator_host_w_credentials(): def test_client__get_scopes_default(): - from google.cloud.bigtable.client import DATA_SCOPE + from google.cloud.bigtable.deprecated.client import DATA_SCOPE client = _make_client(project=PROJECT, credentials=_make_credentials()) assert client._get_scopes() == (DATA_SCOPE,) def test_client__get_scopes_w_admin(): - from google.cloud.bigtable.client import ADMIN_SCOPE - from google.cloud.bigtable.client import DATA_SCOPE + from google.cloud.bigtable.deprecated.client import ADMIN_SCOPE + from google.cloud.bigtable.deprecated.client import DATA_SCOPE client = _make_client(project=PROJECT, credentials=_make_credentials(), admin=True) expected_scopes = (DATA_SCOPE, ADMIN_SCOPE) @@ -254,7 +254,7 @@ def test_client__get_scopes_w_admin(): def test_client__get_scopes_w_read_only(): - from google.cloud.bigtable.client import READ_ONLY_SCOPE + from google.cloud.bigtable.deprecated.client import READ_ONLY_SCOPE client = _make_client( project=PROJECT, credentials=_make_credentials(), read_only=True @@ -344,7 +344,7 @@ def test_client__local_composite_credentials(): def _create_gapic_client_channel_helper(endpoint=None, emulator_host=None): - from google.cloud.bigtable.client import _GRPC_CHANNEL_OPTIONS + from google.cloud.bigtable.deprecated.client import _GRPC_CHANNEL_OPTIONS client_class = mock.Mock(spec=["DEFAULT_ENDPOINT"]) credentials = _make_credentials() @@ -618,7 +618,7 @@ def test_client_instance_admin_client_initialized(): def test_client_instance_factory_defaults(): - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.instance import Instance credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials) @@ -634,8 +634,8 @@ def test_client_instance_factory_defaults(): def test_client_instance_factory_non_defaults(): - from google.cloud.bigtable.instance import Instance - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.deprecated import enums instance_type = enums.Instance.Type.DEVELOPMENT labels = {"foo": "bar"} @@ -665,7 +665,7 @@ def test_client_list_instances(): from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.instance import Instance FAILED_LOCATION = "FAILED" INSTANCE_ID1 = "instance-id1" @@ -717,7 +717,7 @@ def test_client_list_clusters(): bigtable_instance_admin as messages_v2_pb2, ) from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.instance import Cluster + from google.cloud.bigtable.deprecated.instance import Cluster instance_api = mock.create_autospec(BigtableInstanceAdminClient) diff --git a/tests/unit/test_cluster.py b/tests/unit/v2_client/test_cluster.py similarity index 94% rename from tests/unit/test_cluster.py rename to tests/unit/v2_client/test_cluster.py index cb0312b0c..e667c2af4 100644 --- a/tests/unit/test_cluster.py +++ b/tests/unit/v2_client/test_cluster.py @@ -42,13 +42,13 @@ def _make_cluster(*args, **kwargs): - from google.cloud.bigtable.cluster import Cluster + from google.cloud.bigtable.deprecated.cluster import Cluster return Cluster(*args, **kwargs) def _make_client(*args, **kwargs): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client return Client(*args, **kwargs) @@ -72,8 +72,8 @@ def test_cluster_constructor_defaults(): def test_cluster_constructor_explicit(): - from google.cloud.bigtable.enums import StorageType - from google.cloud.bigtable.enums import Cluster + from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.deprecated.enums import Cluster STATE = Cluster.State.READY STORAGE_TYPE_SSD = StorageType.SSD @@ -126,8 +126,8 @@ def test_cluster_kms_key_name_setter(): def test_cluster_from_pb_success(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.cluster import Cluster - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated.cluster import Cluster + from google.cloud.bigtable.deprecated import enums client = _Client(PROJECT) instance = _Instance(INSTANCE_ID, client) @@ -162,7 +162,7 @@ def test_cluster_from_pb_success(): def test_cluster_from_pb_w_bad_cluster_name(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.cluster import Cluster + from google.cloud.bigtable.deprecated.cluster import Cluster bad_cluster_name = "BAD_NAME" @@ -174,7 +174,7 @@ def test_cluster_from_pb_w_bad_cluster_name(): def test_cluster_from_pb_w_instance_id_mistmatch(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.cluster import Cluster + from google.cloud.bigtable.deprecated.cluster import Cluster ALT_INSTANCE_ID = "ALT_INSTANCE_ID" client = _Client(PROJECT) @@ -189,7 +189,7 @@ def test_cluster_from_pb_w_instance_id_mistmatch(): def test_cluster_from_pb_w_project_mistmatch(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.cluster import Cluster + from google.cloud.bigtable.deprecated.cluster import Cluster ALT_PROJECT = "ALT_PROJECT" client = _Client(project=ALT_PROJECT) @@ -204,8 +204,8 @@ def test_cluster_from_pb_w_project_mistmatch(): def test_cluster_from_pb_w_autoscaling(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.cluster import Cluster - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated.cluster import Cluster + from google.cloud.bigtable.deprecated import enums client = _Client(PROJECT) instance = _Instance(INSTANCE_ID, client) @@ -292,8 +292,8 @@ def _make_instance_admin_client(): def test_cluster_reload(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.enums import StorageType - from google.cloud.bigtable.enums import Cluster + from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.deprecated.enums import Cluster credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -349,7 +349,7 @@ def test_cluster_reload(): def test_cluster_exists_hit(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.instance import Instance credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -371,7 +371,7 @@ def test_cluster_exists_hit(): def test_cluster_exists_miss(): - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.instance import Instance from google.api_core import exceptions credentials = _make_credentials() @@ -390,7 +390,7 @@ def test_cluster_exists_miss(): def test_cluster_exists_w_error(): - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.instance import Instance from google.api_core import exceptions credentials = _make_credentials() @@ -416,9 +416,9 @@ def test_cluster_create(): bigtable_instance_admin as messages_v2_pb2, ) from google.cloud._helpers import _datetime_to_pb_timestamp - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.instance import Instance from google.cloud.bigtable_admin_v2.types import instance as instance_v2_pb2 - from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.deprecated.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -471,9 +471,9 @@ def test_cluster_create_w_cmek(): bigtable_instance_admin as messages_v2_pb2, ) from google.cloud._helpers import _datetime_to_pb_timestamp - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.instance import Instance from google.cloud.bigtable_admin_v2.types import instance as instance_v2_pb2 - from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.deprecated.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -531,9 +531,9 @@ def test_cluster_create_w_autoscaling(): bigtable_instance_admin as messages_v2_pb2, ) from google.cloud._helpers import _datetime_to_pb_timestamp - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.instance import Instance from google.cloud.bigtable_admin_v2.types import instance as instance_v2_pb2 - from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.deprecated.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -600,7 +600,7 @@ def test_cluster_update(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.deprecated.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -667,7 +667,7 @@ def test_cluster_update_w_autoscaling(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.deprecated.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -726,7 +726,7 @@ def test_cluster_update_w_partial_autoscaling_config(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.deprecated.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -811,7 +811,7 @@ def test_cluster_update_w_both_manual_and_autoscaling(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.deprecated.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -871,8 +871,8 @@ def test_cluster_disable_autoscaling(): bigtable_instance_admin as messages_v2_pb2, ) from google.cloud._helpers import _datetime_to_pb_timestamp - from google.cloud.bigtable.instance import Instance - from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.deprecated.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -928,8 +928,8 @@ def test_cluster_disable_autoscaling(): def test_create_cluster_with_both_manual_and_autoscaling(): - from google.cloud.bigtable.instance import Instance - from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.deprecated.enums import StorageType credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -956,8 +956,8 @@ def test_create_cluster_with_both_manual_and_autoscaling(): def test_create_cluster_with_partial_autoscaling_config(): - from google.cloud.bigtable.instance import Instance - from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.deprecated.enums import StorageType credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -997,8 +997,8 @@ def test_create_cluster_with_partial_autoscaling_config(): def test_create_cluster_with_no_scaling_config(): - from google.cloud.bigtable.instance import Instance - from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.deprecated.enums import StorageType credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) diff --git a/tests/unit/test_column_family.py b/tests/unit/v2_client/test_column_family.py similarity index 87% rename from tests/unit/test_column_family.py rename to tests/unit/v2_client/test_column_family.py index b464024a7..d16d2b20c 100644 --- a/tests/unit/test_column_family.py +++ b/tests/unit/v2_client/test_column_family.py @@ -19,7 +19,7 @@ def _make_max_versions_gc_rule(*args, **kwargs): - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule return MaxVersionsGCRule(*args, **kwargs) @@ -51,7 +51,7 @@ def test_max_versions_gc_rule_to_pb(): def _make_max_age_gc_rule(*args, **kwargs): - from google.cloud.bigtable.column_family import MaxAgeGCRule + from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule return MaxAgeGCRule(*args, **kwargs) @@ -89,7 +89,7 @@ def test_max_age_gc_rule_to_pb(): def _make_gc_rule_union(*args, **kwargs): - from google.cloud.bigtable.column_family import GCRuleUnion + from google.cloud.bigtable.deprecated.column_family import GCRuleUnion return GCRuleUnion(*args, **kwargs) @@ -124,8 +124,8 @@ def test_gc_rule_union___ne__same_value(): def test_gc_rule_union_to_pb(): import datetime from google.protobuf import duration_pb2 - from google.cloud.bigtable.column_family import MaxAgeGCRule - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule max_num_versions = 42 rule1 = MaxVersionsGCRule(max_num_versions) @@ -145,8 +145,8 @@ def test_gc_rule_union_to_pb(): def test_gc_rule_union_to_pb_nested(): import datetime from google.protobuf import duration_pb2 - from google.cloud.bigtable.column_family import MaxAgeGCRule - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule max_num_versions1 = 42 rule1 = MaxVersionsGCRule(max_num_versions1) @@ -171,7 +171,7 @@ def test_gc_rule_union_to_pb_nested(): def _make_gc_rule_intersection(*args, **kwargs): - from google.cloud.bigtable.column_family import GCRuleIntersection + from google.cloud.bigtable.deprecated.column_family import GCRuleIntersection return GCRuleIntersection(*args, **kwargs) @@ -206,8 +206,8 @@ def test_gc_rule_intersection___ne__same_value(): def test_gc_rule_intersection_to_pb(): import datetime from google.protobuf import duration_pb2 - from google.cloud.bigtable.column_family import MaxAgeGCRule - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule max_num_versions = 42 rule1 = MaxVersionsGCRule(max_num_versions) @@ -227,8 +227,8 @@ def test_gc_rule_intersection_to_pb(): def test_gc_rule_intersection_to_pb_nested(): import datetime from google.protobuf import duration_pb2 - from google.cloud.bigtable.column_family import MaxAgeGCRule - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule max_num_versions1 = 42 rule1 = MaxVersionsGCRule(max_num_versions1) @@ -253,13 +253,13 @@ def test_gc_rule_intersection_to_pb_nested(): def _make_column_family(*args, **kwargs): - from google.cloud.bigtable.column_family import ColumnFamily + from google.cloud.bigtable.deprecated.column_family import ColumnFamily return ColumnFamily(*args, **kwargs) def _make_client(*args, **kwargs): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client return Client(*args, **kwargs) @@ -323,7 +323,7 @@ def test_column_family_to_pb_no_rules(): def test_column_family_to_pb_with_rule(): - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule gc_rule = MaxVersionsGCRule(1) column_family = _make_column_family("column_family_id", None, gc_rule=gc_rule) @@ -336,7 +336,7 @@ def _create_test_helper(gc_rule=None): from google.cloud.bigtable_admin_v2.types import ( bigtable_table_admin as table_admin_v2_pb2, ) - from tests.unit._testing import _FakeStub + from ._testing import _FakeStub from google.cloud.bigtable_admin_v2.services.bigtable_table_admin import ( BigtableTableAdminClient, ) @@ -397,14 +397,14 @@ def test_column_family_create(): def test_column_family_create_with_gc_rule(): - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule gc_rule = MaxVersionsGCRule(1337) _create_test_helper(gc_rule=gc_rule) def _update_test_helper(gc_rule=None): - from tests.unit._testing import _FakeStub + from ._testing import _FakeStub from google.cloud.bigtable_admin_v2.types import ( bigtable_table_admin as table_admin_v2_pb2, ) @@ -467,7 +467,7 @@ def test_column_family_update(): def test_column_family_update_with_gc_rule(): - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule gc_rule = MaxVersionsGCRule(1337) _update_test_helper(gc_rule=gc_rule) @@ -478,7 +478,7 @@ def test_column_family_delete(): from google.cloud.bigtable_admin_v2.types import ( bigtable_table_admin as table_admin_v2_pb2, ) - from tests.unit._testing import _FakeStub + from ._testing import _FakeStub from google.cloud.bigtable_admin_v2.services.bigtable_table_admin import ( BigtableTableAdminClient, ) @@ -530,15 +530,15 @@ def test_column_family_delete(): def test__gc_rule_from_pb_empty(): - from google.cloud.bigtable.column_family import _gc_rule_from_pb + from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb gc_rule_pb = _GcRulePB() assert _gc_rule_from_pb(gc_rule_pb) is None def test__gc_rule_from_pb_max_num_versions(): - from google.cloud.bigtable.column_family import _gc_rule_from_pb - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule orig_rule = MaxVersionsGCRule(1) gc_rule_pb = orig_rule.to_pb() @@ -549,8 +549,8 @@ def test__gc_rule_from_pb_max_num_versions(): def test__gc_rule_from_pb_max_age(): import datetime - from google.cloud.bigtable.column_family import _gc_rule_from_pb - from google.cloud.bigtable.column_family import MaxAgeGCRule + from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb + from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule orig_rule = MaxAgeGCRule(datetime.timedelta(seconds=1)) gc_rule_pb = orig_rule.to_pb() @@ -561,10 +561,10 @@ def test__gc_rule_from_pb_max_age(): def test__gc_rule_from_pb_union(): import datetime - from google.cloud.bigtable.column_family import _gc_rule_from_pb - from google.cloud.bigtable.column_family import GCRuleUnion - from google.cloud.bigtable.column_family import MaxAgeGCRule - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb + from google.cloud.bigtable.deprecated.column_family import GCRuleUnion + from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule rule1 = MaxVersionsGCRule(1) rule2 = MaxAgeGCRule(datetime.timedelta(seconds=1)) @@ -577,10 +577,10 @@ def test__gc_rule_from_pb_union(): def test__gc_rule_from_pb_intersection(): import datetime - from google.cloud.bigtable.column_family import _gc_rule_from_pb - from google.cloud.bigtable.column_family import GCRuleIntersection - from google.cloud.bigtable.column_family import MaxAgeGCRule - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb + from google.cloud.bigtable.deprecated.column_family import GCRuleIntersection + from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule rule1 = MaxVersionsGCRule(1) rule2 = MaxAgeGCRule(datetime.timedelta(seconds=1)) @@ -592,7 +592,7 @@ def test__gc_rule_from_pb_intersection(): def test__gc_rule_from_pb_unknown_field_name(): - from google.cloud.bigtable.column_family import _gc_rule_from_pb + from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb class MockProto(object): diff --git a/tests/unit/test_encryption_info.py b/tests/unit/v2_client/test_encryption_info.py similarity index 94% rename from tests/unit/test_encryption_info.py rename to tests/unit/v2_client/test_encryption_info.py index 8b92a83ed..0b6a93e9e 100644 --- a/tests/unit/test_encryption_info.py +++ b/tests/unit/v2_client/test_encryption_info.py @@ -14,7 +14,7 @@ import mock -from google.cloud.bigtable import enums +from google.cloud.bigtable.deprecated import enums EncryptionType = enums.EncryptionInfo.EncryptionType @@ -30,7 +30,7 @@ def _make_status_pb(code=_STATUS_CODE, message=_STATUS_MESSAGE): def _make_status(code=_STATUS_CODE, message=_STATUS_MESSAGE): - from google.cloud.bigtable.error import Status + from google.cloud.bigtable.deprecated.error import Status status_pb = _make_status_pb(code=code, message=message) return Status(status_pb) @@ -54,7 +54,7 @@ def _make_info_pb( def _make_encryption_info(*args, **kwargs): - from google.cloud.bigtable.encryption_info import EncryptionInfo + from google.cloud.bigtable.deprecated.encryption_info import EncryptionInfo return EncryptionInfo(*args, **kwargs) @@ -70,7 +70,7 @@ def _make_encryption_info_defaults( def test_encryption_info__from_pb(): - from google.cloud.bigtable.encryption_info import EncryptionInfo + from google.cloud.bigtable.deprecated.encryption_info import EncryptionInfo info_pb = _make_info_pb() diff --git a/tests/unit/test_error.py b/tests/unit/v2_client/test_error.py similarity index 97% rename from tests/unit/test_error.py rename to tests/unit/v2_client/test_error.py index 8b148473c..072a3b3c3 100644 --- a/tests/unit/test_error.py +++ b/tests/unit/v2_client/test_error.py @@ -20,7 +20,7 @@ def _make_status_pb(**kwargs): def _make_status(status_pb): - from google.cloud.bigtable.error import Status + from google.cloud.bigtable.deprecated.error import Status return Status(status_pb) diff --git a/tests/unit/test_instance.py b/tests/unit/v2_client/test_instance.py similarity index 95% rename from tests/unit/test_instance.py rename to tests/unit/v2_client/test_instance.py index c577adca5..b43e8bb38 100644 --- a/tests/unit/test_instance.py +++ b/tests/unit/v2_client/test_instance.py @@ -17,7 +17,7 @@ import pytest from ._testing import _make_credentials -from google.cloud.bigtable.cluster import Cluster +from google.cloud.bigtable.deprecated.cluster import Cluster PROJECT = "project" INSTANCE_ID = "instance-id" @@ -47,7 +47,7 @@ def _make_client(*args, **kwargs): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client return Client(*args, **kwargs) @@ -61,7 +61,7 @@ def _make_instance_admin_api(): def _make_instance(*args, **kwargs): - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.instance import Instance return Instance(*args, **kwargs) @@ -79,7 +79,7 @@ def test_instance_constructor_defaults(): def test_instance_constructor_non_default(): - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import enums instance_type = enums.Instance.Type.DEVELOPMENT state = enums.Instance.State.READY @@ -104,7 +104,7 @@ def test_instance_constructor_non_default(): def test_instance__update_from_pb_success(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import enums instance_type = data_v2_pb2.Instance.Type.PRODUCTION state = enums.Instance.State.READY @@ -129,7 +129,7 @@ def test_instance__update_from_pb_success(): def test_instance__update_from_pb_success_defaults(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import enums instance_pb = data_v2_pb2.Instance(display_name=DISPLAY_NAME) @@ -156,8 +156,8 @@ def test_instance__update_from_pb_wo_display_name(): def test_instance_from_pb_success(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable import enums - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable.deprecated.instance import Instance credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -184,7 +184,7 @@ def test_instance_from_pb_success(): def test_instance_from_pb_bad_instance_name(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.instance import Instance instance_name = "INCORRECT_FORMAT" instance_pb = data_v2_pb2.Instance(name=instance_name) @@ -195,7 +195,7 @@ def test_instance_from_pb_bad_instance_name(): def test_instance_from_pb_project_mistmatch(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.instance import Instance ALT_PROJECT = "ALT_PROJECT" credentials = _make_credentials() @@ -304,7 +304,7 @@ def _instance_api_response_for_create(): def test_instance_create(): - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import enums from google.cloud.bigtable_admin_v2.types import Instance from google.cloud.bigtable_admin_v2.types import Cluster import warnings @@ -353,8 +353,8 @@ def test_instance_create(): def test_instance_create_w_clusters(): - from google.cloud.bigtable import enums - from google.cloud.bigtable.cluster import Cluster + from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable.deprecated.cluster import Cluster from google.cloud.bigtable_admin_v2.types import Cluster as cluster_pb from google.cloud.bigtable_admin_v2.types import Instance as instance_pb @@ -473,7 +473,7 @@ def test_instance_exists_w_error(): def test_instance_reload(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import enums DISPLAY_NAME = "hey-hi-hello" credentials = _make_credentials() @@ -527,7 +527,7 @@ def _instance_api_response_for_update(): def test_instance_update(): - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import enums from google.protobuf import field_mask_pb2 from google.cloud.bigtable_admin_v2.types import Instance @@ -603,7 +603,7 @@ def test_instance_delete(): def test_instance_get_iam_policy(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -631,7 +631,7 @@ def test_instance_get_iam_policy(): def test_instance_get_iam_policy_w_requested_policy_version(): from google.iam.v1 import policy_pb2, options_pb2 - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -665,8 +665,8 @@ def test_instance_get_iam_policy_w_requested_policy_version(): def test_instance_set_iam_policy(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.policy import Policy - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -721,7 +721,7 @@ def test_instance_test_iam_permissions(): def test_instance_cluster_factory(): - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import enums CLUSTER_ID = "{}-cluster".format(INSTANCE_ID) LOCATION_ID = "us-central1-c" @@ -749,8 +749,8 @@ def test_instance_list_clusters(): bigtable_instance_admin as messages_v2_pb2, ) from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.instance import Instance - from google.cloud.bigtable.instance import Cluster + from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.deprecated.instance import Cluster credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -788,7 +788,7 @@ def test_instance_list_clusters(): def test_instance_table_factory(): - from google.cloud.bigtable.table import Table + from google.cloud.bigtable.deprecated.table import Table app_profile_id = "appProfileId1262094415" instance = _make_instance(INSTANCE_ID, None) @@ -857,7 +857,7 @@ def test_instance_list_tables_failure_name_bad_before(): def test_instance_app_profile_factory(): - from google.cloud.bigtable.enums import RoutingPolicyType + from google.cloud.bigtable.deprecated.enums import RoutingPolicyType instance = _make_instance(INSTANCE_ID, None) @@ -890,7 +890,7 @@ def test_instance_list_app_profiles(): from google.api_core.page_iterator import Iterator from google.api_core.page_iterator import Page from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.app_profile import AppProfile + from google.cloud.bigtable.deprecated.app_profile import AppProfile class _Iterator(Iterator): def __init__(self, pages): diff --git a/tests/unit/test_policy.py b/tests/unit/v2_client/test_policy.py similarity index 89% rename from tests/unit/test_policy.py rename to tests/unit/v2_client/test_policy.py index 77674517e..ef3df2d2b 100644 --- a/tests/unit/test_policy.py +++ b/tests/unit/v2_client/test_policy.py @@ -14,7 +14,7 @@ def _make_policy(*args, **kw): - from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.deprecated.policy import Policy return Policy(*args, **kw) @@ -48,7 +48,7 @@ def test_policy_ctor_explicit(): def test_policy_bigtable_admins(): - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE MEMBER = "user:phred@example.com" expected = frozenset([MEMBER]) @@ -58,7 +58,7 @@ def test_policy_bigtable_admins(): def test_policy_bigtable_readers(): - from google.cloud.bigtable.policy import BIGTABLE_READER_ROLE + from google.cloud.bigtable.deprecated.policy import BIGTABLE_READER_ROLE MEMBER = "user:phred@example.com" expected = frozenset([MEMBER]) @@ -68,7 +68,7 @@ def test_policy_bigtable_readers(): def test_policy_bigtable_users(): - from google.cloud.bigtable.policy import BIGTABLE_USER_ROLE + from google.cloud.bigtable.deprecated.policy import BIGTABLE_USER_ROLE MEMBER = "user:phred@example.com" expected = frozenset([MEMBER]) @@ -78,7 +78,7 @@ def test_policy_bigtable_users(): def test_policy_bigtable_viewers(): - from google.cloud.bigtable.policy import BIGTABLE_VIEWER_ROLE + from google.cloud.bigtable.deprecated.policy import BIGTABLE_VIEWER_ROLE MEMBER = "user:phred@example.com" expected = frozenset([MEMBER]) @@ -89,7 +89,7 @@ def test_policy_bigtable_viewers(): def test_policy_from_pb_w_empty(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.deprecated.policy import Policy empty = frozenset() message = policy_pb2.Policy() @@ -106,8 +106,8 @@ def test_policy_from_pb_w_empty(): def test_policy_from_pb_w_non_empty(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE - from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import Policy ETAG = b"ETAG" VERSION = 1 @@ -133,8 +133,8 @@ def test_policy_from_pb_w_condition(): import pytest from google.iam.v1 import policy_pb2 from google.api_core.iam import InvalidOperationException, _DICT_ACCESS_MSG - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE - from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import Policy ETAG = b"ETAG" VERSION = 3 @@ -184,7 +184,7 @@ def test_policy_to_pb_empty(): def test_policy_to_pb_explicit(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE VERSION = 1 ETAG = b"ETAG" @@ -204,7 +204,7 @@ def test_policy_to_pb_explicit(): def test_policy_to_pb_w_condition(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE VERSION = 3 ETAG = b"ETAG" @@ -234,7 +234,7 @@ def test_policy_to_pb_w_condition(): def test_policy_from_api_repr_wo_etag(): - from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.deprecated.policy import Policy VERSION = 1 empty = frozenset() @@ -252,7 +252,7 @@ def test_policy_from_api_repr_wo_etag(): def test_policy_from_api_repr_w_etag(): import base64 - from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.deprecated.policy import Policy ETAG = b"ETAG" empty = frozenset() diff --git a/tests/unit/test_row.py b/tests/unit/v2_client/test_row.py similarity index 95% rename from tests/unit/test_row.py rename to tests/unit/v2_client/test_row.py index 49bbfc45c..4850b18c3 100644 --- a/tests/unit/test_row.py +++ b/tests/unit/v2_client/test_row.py @@ -20,13 +20,13 @@ def _make_client(*args, **kwargs): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client return Client(*args, **kwargs) def _make_row(*args, **kwargs): - from google.cloud.bigtable.row import Row + from google.cloud.bigtable.deprecated.row import Row return Row(*args, **kwargs) @@ -42,7 +42,7 @@ def test_row_table_getter(): def _make__set_delete_row(*args, **kwargs): - from google.cloud.bigtable.row import _SetDeleteRow + from google.cloud.bigtable.deprecated.row import _SetDeleteRow return _SetDeleteRow(*args, **kwargs) @@ -54,7 +54,7 @@ def test__set_detlete_row__get_mutations_virtual(): def _make_direct_row(*args, **kwargs): - from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.deprecated.row import DirectRow return DirectRow(*args, **kwargs) @@ -193,7 +193,7 @@ def test_direct_row_delete(): def test_direct_row_delete_cell(): - from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.deprecated.row import DirectRow class MockRow(DirectRow): def __init__(self, *args, **kwargs): @@ -237,7 +237,7 @@ def test_direct_row_delete_cells_non_iterable(): def test_direct_row_delete_cells_all_columns(): - from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.deprecated.row import DirectRow row_key = b"row_key" column_family_id = "column_family_id" @@ -293,7 +293,7 @@ def test_direct_row_delete_cells_no_time_range(): def test_direct_row_delete_cells_with_time_range(): import datetime from google.cloud._helpers import _EPOCH - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.deprecated.row_filters import TimestampRange microseconds = 30871000 # Makes sure already milliseconds granularity start = _EPOCH + datetime.timedelta(microseconds=microseconds) @@ -386,7 +386,7 @@ def test_direct_row_commit_with_exception(): def _make_conditional_row(*args, **kwargs): - from google.cloud.bigtable.row import ConditionalRow + from google.cloud.bigtable.deprecated.row import ConditionalRow return ConditionalRow(*args, **kwargs) @@ -417,7 +417,7 @@ def test_conditional_row__get_mutations(): def test_conditional_row_commit(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter from google.cloud.bigtable_v2.services.bigtable import BigtableClient project_id = "project-id" @@ -466,7 +466,7 @@ def test_conditional_row_commit(): def test_conditional_row_commit_too_many_mutations(): from google.cloud._testing import _Monkey - from google.cloud.bigtable import row as MUT + from google.cloud.bigtable.deprecated import row as MUT row_key = b"row_key" table = object() @@ -480,7 +480,7 @@ def test_conditional_row_commit_too_many_mutations(): def test_conditional_row_commit_no_mutations(): - from tests.unit._testing import _FakeStub + from ._testing import _FakeStub project_id = "project-id" row_key = b"row_key" @@ -504,7 +504,7 @@ def test_conditional_row_commit_no_mutations(): def _make_append_row(*args, **kwargs): - from google.cloud.bigtable.row import AppendRow + from google.cloud.bigtable.deprecated.row import AppendRow return AppendRow(*args, **kwargs) @@ -564,7 +564,7 @@ def test_append_row_increment_cell_value(): def test_append_row_commit(): from google.cloud._testing import _Monkey - from google.cloud.bigtable import row as MUT + from google.cloud.bigtable.deprecated import row as MUT from google.cloud.bigtable_v2.services.bigtable import BigtableClient project_id = "project-id" @@ -607,7 +607,7 @@ def mock_parse_rmw_row_response(row_response): def test_append_row_commit_no_rules(): - from tests.unit._testing import _FakeStub + from ._testing import _FakeStub project_id = "project-id" row_key = b"row_key" @@ -630,7 +630,7 @@ def test_append_row_commit_no_rules(): def test_append_row_commit_too_many_mutations(): from google.cloud._testing import _Monkey - from google.cloud.bigtable import row as MUT + from google.cloud.bigtable.deprecated import row as MUT row_key = b"row_key" table = object() @@ -644,7 +644,7 @@ def test_append_row_commit_too_many_mutations(): def test__parse_rmw_row_response(): from google.cloud._helpers import _datetime_from_microseconds - from google.cloud.bigtable.row import _parse_rmw_row_response + from google.cloud.bigtable.deprecated.row import _parse_rmw_row_response col_fam1 = "col-fam-id" col_fam2 = "col-fam-id2" @@ -700,7 +700,7 @@ def test__parse_rmw_row_response(): def test__parse_family_pb(): from google.cloud._helpers import _datetime_from_microseconds - from google.cloud.bigtable.row import _parse_family_pb + from google.cloud.bigtable.deprecated.row import _parse_family_pb col_fam1 = "col-fam-id" col_name1 = b"col-name1" diff --git a/tests/unit/test_row_data.py b/tests/unit/v2_client/test_row_data.py similarity index 94% rename from tests/unit/test_row_data.py rename to tests/unit/v2_client/test_row_data.py index 382a81ef1..ee9b065c8 100644 --- a/tests/unit/test_row_data.py +++ b/tests/unit/v2_client/test_row_data.py @@ -27,7 +27,7 @@ def _make_cell(*args, **kwargs): - from google.cloud.bigtable.row_data import Cell + from google.cloud.bigtable.deprecated.row_data import Cell return Cell(*args, **kwargs) @@ -36,7 +36,7 @@ def _cell_from_pb_test_helper(labels=None): import datetime from google.cloud._helpers import _EPOCH from google.cloud.bigtable_v2.types import data as data_v2_pb2 - from google.cloud.bigtable.row_data import Cell + from google.cloud.bigtable.deprecated.row_data import Cell timestamp = _EPOCH + datetime.timedelta(microseconds=TIMESTAMP_MICROS) value = b"value-bytes" @@ -100,7 +100,7 @@ def test_cell___ne__(): def _make_partial_row_data(*args, **kwargs): - from google.cloud.bigtable.row_data import PartialRowData + from google.cloud.bigtable.deprecated.row_data import PartialRowData return PartialRowData(*args, **kwargs) @@ -288,7 +288,7 @@ def trailing_metadata(self): def test__retry_read_rows_exception_miss(): from google.api_core.exceptions import Conflict - from google.cloud.bigtable.row_data import _retry_read_rows_exception + from google.cloud.bigtable.deprecated.row_data import _retry_read_rows_exception exception = Conflict("testing") assert not _retry_read_rows_exception(exception) @@ -296,7 +296,7 @@ def test__retry_read_rows_exception_miss(): def test__retry_read_rows_exception_service_unavailable(): from google.api_core.exceptions import ServiceUnavailable - from google.cloud.bigtable.row_data import _retry_read_rows_exception + from google.cloud.bigtable.deprecated.row_data import _retry_read_rows_exception exception = ServiceUnavailable("testing") assert _retry_read_rows_exception(exception) @@ -304,7 +304,7 @@ def test__retry_read_rows_exception_service_unavailable(): def test__retry_read_rows_exception_deadline_exceeded(): from google.api_core.exceptions import DeadlineExceeded - from google.cloud.bigtable.row_data import _retry_read_rows_exception + from google.cloud.bigtable.deprecated.row_data import _retry_read_rows_exception exception = DeadlineExceeded("testing") assert _retry_read_rows_exception(exception) @@ -312,7 +312,7 @@ def test__retry_read_rows_exception_deadline_exceeded(): def test__retry_read_rows_exception_internal_server_not_retriable(): from google.api_core.exceptions import InternalServerError - from google.cloud.bigtable.row_data import ( + from google.cloud.bigtable.deprecated.row_data import ( _retry_read_rows_exception, RETRYABLE_INTERNAL_ERROR_MESSAGES, ) @@ -325,7 +325,7 @@ def test__retry_read_rows_exception_internal_server_not_retriable(): def test__retry_read_rows_exception_internal_server_retriable(): from google.api_core.exceptions import InternalServerError - from google.cloud.bigtable.row_data import ( + from google.cloud.bigtable.deprecated.row_data import ( _retry_read_rows_exception, RETRYABLE_INTERNAL_ERROR_MESSAGES, ) @@ -337,7 +337,7 @@ def test__retry_read_rows_exception_internal_server_retriable(): def test__retry_read_rows_exception_miss_wrapped_in_grpc(): from google.api_core.exceptions import Conflict - from google.cloud.bigtable.row_data import _retry_read_rows_exception + from google.cloud.bigtable.deprecated.row_data import _retry_read_rows_exception wrapped = Conflict("testing") exception = _make_grpc_call_error(wrapped) @@ -346,7 +346,7 @@ def test__retry_read_rows_exception_miss_wrapped_in_grpc(): def test__retry_read_rows_exception_service_unavailable_wrapped_in_grpc(): from google.api_core.exceptions import ServiceUnavailable - from google.cloud.bigtable.row_data import _retry_read_rows_exception + from google.cloud.bigtable.deprecated.row_data import _retry_read_rows_exception wrapped = ServiceUnavailable("testing") exception = _make_grpc_call_error(wrapped) @@ -355,7 +355,7 @@ def test__retry_read_rows_exception_service_unavailable_wrapped_in_grpc(): def test__retry_read_rows_exception_deadline_exceeded_wrapped_in_grpc(): from google.api_core.exceptions import DeadlineExceeded - from google.cloud.bigtable.row_data import _retry_read_rows_exception + from google.cloud.bigtable.deprecated.row_data import _retry_read_rows_exception wrapped = DeadlineExceeded("testing") exception = _make_grpc_call_error(wrapped) @@ -363,7 +363,7 @@ def test__retry_read_rows_exception_deadline_exceeded_wrapped_in_grpc(): def _make_partial_rows_data(*args, **kwargs): - from google.cloud.bigtable.row_data import PartialRowsData + from google.cloud.bigtable.deprecated.row_data import PartialRowsData return PartialRowsData(*args, **kwargs) @@ -373,13 +373,13 @@ def _partial_rows_data_consume_all(yrd): def _make_client(*args, **kwargs): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client return Client(*args, **kwargs) def test_partial_rows_data_constructor(): - from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS + from google.cloud.bigtable.deprecated.row_data import DEFAULT_RETRY_READ_ROWS client = _Client() client._data_stub = mock.MagicMock() @@ -436,7 +436,7 @@ def fake_read(*args, **kwargs): def test_partial_rows_data_constructor_with_retry(): - from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS + from google.cloud.bigtable.deprecated.row_data import DEFAULT_RETRY_READ_ROWS client = _Client() client._data_stub = mock.MagicMock() @@ -644,7 +644,7 @@ def test_partial_rows_data_valid_last_scanned_row_key_on_start(): def test_partial_rows_data_invalid_empty_chunk(): - from google.cloud.bigtable.row_data import InvalidChunk + from google.cloud.bigtable.deprecated.row_data import InvalidChunk from google.cloud.bigtable_v2.services.bigtable import BigtableClient client = _Client() @@ -755,14 +755,14 @@ def test_partial_rows_data_yield_retry_rows_data(): def _make_read_rows_request_manager(*args, **kwargs): - from google.cloud.bigtable.row_data import _ReadRowsRequestManager + from google.cloud.bigtable.deprecated.row_data import _ReadRowsRequestManager return _ReadRowsRequestManager(*args, **kwargs) @pytest.fixture(scope="session") def rrrm_data(): - from google.cloud.bigtable import row_set + from google.cloud.bigtable.deprecated import row_set row_range1 = row_set.RowRange(b"row_key21", b"row_key29") row_range2 = row_set.RowRange(b"row_key31", b"row_key39") @@ -851,7 +851,7 @@ def test_RRRM__filter_row_ranges_all_ranges_already_read(rrrm_data): def test_RRRM__filter_row_ranges_all_ranges_already_read_open_closed(): - from google.cloud.bigtable import row_set + from google.cloud.bigtable.deprecated import row_set last_scanned_key = b"row_key54" @@ -895,7 +895,7 @@ def test_RRRM__filter_row_ranges_some_ranges_already_read(rrrm_data): def test_RRRM_build_updated_request(rrrm_data): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter from google.cloud.bigtable_v2 import types row_range1 = rrrm_data["row_range1"] @@ -944,7 +944,7 @@ def test_RRRM_build_updated_request_full_table(): def test_RRRM_build_updated_request_no_start_key(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter from google.cloud.bigtable_v2 import types row_filter = RowSampleFilter(0.33) @@ -972,7 +972,7 @@ def test_RRRM_build_updated_request_no_start_key(): def test_RRRM_build_updated_request_no_end_key(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter from google.cloud.bigtable_v2 import types row_filter = RowSampleFilter(0.33) @@ -998,7 +998,7 @@ def test_RRRM_build_updated_request_no_end_key(): def test_RRRM_build_updated_request_rows(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter row_filter = RowSampleFilter(0.33) last_scanned_key = b"row_key4" @@ -1046,7 +1046,7 @@ def test_RRRM__key_already_read(): def test_RRRM__rows_limit_reached(): - from google.cloud.bigtable.row_data import InvalidRetryRequest + from google.cloud.bigtable.deprecated.row_data import InvalidRetryRequest last_scanned_key = b"row_key14" request = _ReadRowsRequestPB(table_name=TABLE_NAME) @@ -1059,7 +1059,7 @@ def test_RRRM__rows_limit_reached(): def test_RRRM_build_updated_request_last_row_read_raises_invalid_retry_request(): - from google.cloud.bigtable.row_data import InvalidRetryRequest + from google.cloud.bigtable.deprecated.row_data import InvalidRetryRequest last_scanned_key = b"row_key4" request = _ReadRowsRequestPB(table_name=TABLE_NAME) @@ -1073,8 +1073,8 @@ def test_RRRM_build_updated_request_last_row_read_raises_invalid_retry_request() def test_RRRM_build_updated_request_row_ranges_read_raises_invalid_retry_request(): - from google.cloud.bigtable.row_data import InvalidRetryRequest - from google.cloud.bigtable import row_set + from google.cloud.bigtable.deprecated.row_data import InvalidRetryRequest + from google.cloud.bigtable.deprecated import row_set row_range1 = row_set.RowRange(b"row_key21", b"row_key29") @@ -1095,7 +1095,7 @@ def test_RRRM_build_updated_request_row_ranges_read_raises_invalid_retry_request def test_RRRM_build_updated_request_row_ranges_valid(): - from google.cloud.bigtable import row_set + from google.cloud.bigtable.deprecated import row_set row_range1 = row_set.RowRange(b"row_key21", b"row_key29") @@ -1179,7 +1179,7 @@ def _ReadRowsResponseCellChunkPB(*args, **kw): def _make_cell_pb(value): - from google.cloud.bigtable import row_data + from google.cloud.bigtable.deprecated import row_data return row_data.Cell(value, TIMESTAMP_MICROS) diff --git a/tests/unit/test_row_filters.py b/tests/unit/v2_client/test_row_filters.py similarity index 77% rename from tests/unit/test_row_filters.py rename to tests/unit/v2_client/test_row_filters.py index b312cb942..dfb16ba16 100644 --- a/tests/unit/test_row_filters.py +++ b/tests/unit/v2_client/test_row_filters.py @@ -17,7 +17,7 @@ def test_bool_filter_constructor(): - from google.cloud.bigtable.row_filters import _BoolFilter + from google.cloud.bigtable.deprecated.row_filters import _BoolFilter flag = object() row_filter = _BoolFilter(flag) @@ -25,7 +25,7 @@ def test_bool_filter_constructor(): def test_bool_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import _BoolFilter + from google.cloud.bigtable.deprecated.row_filters import _BoolFilter flag = object() row_filter1 = _BoolFilter(flag) @@ -34,7 +34,7 @@ def test_bool_filter___eq__type_differ(): def test_bool_filter___eq__same_value(): - from google.cloud.bigtable.row_filters import _BoolFilter + from google.cloud.bigtable.deprecated.row_filters import _BoolFilter flag = object() row_filter1 = _BoolFilter(flag) @@ -43,7 +43,7 @@ def test_bool_filter___eq__same_value(): def test_bool_filter___ne__same_value(): - from google.cloud.bigtable.row_filters import _BoolFilter + from google.cloud.bigtable.deprecated.row_filters import _BoolFilter flag = object() row_filter1 = _BoolFilter(flag) @@ -52,7 +52,7 @@ def test_bool_filter___ne__same_value(): def test_sink_filter_to_pb(): - from google.cloud.bigtable.row_filters import SinkFilter + from google.cloud.bigtable.deprecated.row_filters import SinkFilter flag = True row_filter = SinkFilter(flag) @@ -62,7 +62,7 @@ def test_sink_filter_to_pb(): def test_pass_all_filter_to_pb(): - from google.cloud.bigtable.row_filters import PassAllFilter + from google.cloud.bigtable.deprecated.row_filters import PassAllFilter flag = True row_filter = PassAllFilter(flag) @@ -72,7 +72,7 @@ def test_pass_all_filter_to_pb(): def test_block_all_filter_to_pb(): - from google.cloud.bigtable.row_filters import BlockAllFilter + from google.cloud.bigtable.deprecated.row_filters import BlockAllFilter flag = True row_filter = BlockAllFilter(flag) @@ -82,7 +82,7 @@ def test_block_all_filter_to_pb(): def test_regex_filterconstructor(): - from google.cloud.bigtable.row_filters import _RegexFilter + from google.cloud.bigtable.deprecated.row_filters import _RegexFilter regex = b"abc" row_filter = _RegexFilter(regex) @@ -90,7 +90,7 @@ def test_regex_filterconstructor(): def test_regex_filterconstructor_non_bytes(): - from google.cloud.bigtable.row_filters import _RegexFilter + from google.cloud.bigtable.deprecated.row_filters import _RegexFilter regex = "abc" row_filter = _RegexFilter(regex) @@ -98,7 +98,7 @@ def test_regex_filterconstructor_non_bytes(): def test_regex_filter__eq__type_differ(): - from google.cloud.bigtable.row_filters import _RegexFilter + from google.cloud.bigtable.deprecated.row_filters import _RegexFilter regex = b"def-rgx" row_filter1 = _RegexFilter(regex) @@ -107,7 +107,7 @@ def test_regex_filter__eq__type_differ(): def test_regex_filter__eq__same_value(): - from google.cloud.bigtable.row_filters import _RegexFilter + from google.cloud.bigtable.deprecated.row_filters import _RegexFilter regex = b"trex-regex" row_filter1 = _RegexFilter(regex) @@ -116,7 +116,7 @@ def test_regex_filter__eq__same_value(): def test_regex_filter__ne__same_value(): - from google.cloud.bigtable.row_filters import _RegexFilter + from google.cloud.bigtable.deprecated.row_filters import _RegexFilter regex = b"abc" row_filter1 = _RegexFilter(regex) @@ -125,7 +125,7 @@ def test_regex_filter__ne__same_value(): def test_row_key_regex_filter_to_pb(): - from google.cloud.bigtable.row_filters import RowKeyRegexFilter + from google.cloud.bigtable.deprecated.row_filters import RowKeyRegexFilter regex = b"row-key-regex" row_filter = RowKeyRegexFilter(regex) @@ -135,7 +135,7 @@ def test_row_key_regex_filter_to_pb(): def test_row_sample_filter_constructor(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter sample = object() row_filter = RowSampleFilter(sample) @@ -143,7 +143,7 @@ def test_row_sample_filter_constructor(): def test_row_sample_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter sample = object() row_filter1 = RowSampleFilter(sample) @@ -152,7 +152,7 @@ def test_row_sample_filter___eq__type_differ(): def test_row_sample_filter___eq__same_value(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter sample = object() row_filter1 = RowSampleFilter(sample) @@ -161,7 +161,7 @@ def test_row_sample_filter___eq__same_value(): def test_row_sample_filter___ne__(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter sample = object() other_sample = object() @@ -171,7 +171,7 @@ def test_row_sample_filter___ne__(): def test_row_sample_filter_to_pb(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter sample = 0.25 row_filter = RowSampleFilter(sample) @@ -181,7 +181,7 @@ def test_row_sample_filter_to_pb(): def test_family_name_regex_filter_to_pb(): - from google.cloud.bigtable.row_filters import FamilyNameRegexFilter + from google.cloud.bigtable.deprecated.row_filters import FamilyNameRegexFilter regex = "family-regex" row_filter = FamilyNameRegexFilter(regex) @@ -191,7 +191,7 @@ def test_family_name_regex_filter_to_pb(): def test_column_qualifier_regext_filter_to_pb(): - from google.cloud.bigtable.row_filters import ColumnQualifierRegexFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnQualifierRegexFilter regex = b"column-regex" row_filter = ColumnQualifierRegexFilter(regex) @@ -201,7 +201,7 @@ def test_column_qualifier_regext_filter_to_pb(): def test_timestamp_range_constructor(): - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.deprecated.row_filters import TimestampRange start = object() end = object() @@ -211,7 +211,7 @@ def test_timestamp_range_constructor(): def test_timestamp_range___eq__(): - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.deprecated.row_filters import TimestampRange start = object() end = object() @@ -221,7 +221,7 @@ def test_timestamp_range___eq__(): def test_timestamp_range___eq__type_differ(): - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.deprecated.row_filters import TimestampRange start = object() end = object() @@ -231,7 +231,7 @@ def test_timestamp_range___eq__type_differ(): def test_timestamp_range___ne__same_value(): - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.deprecated.row_filters import TimestampRange start = object() end = object() @@ -243,7 +243,7 @@ def test_timestamp_range___ne__same_value(): def _timestamp_range_to_pb_helper(pb_kwargs, start=None, end=None): import datetime from google.cloud._helpers import _EPOCH - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.deprecated.row_filters import TimestampRange if start is not None: start = _EPOCH + datetime.timedelta(microseconds=start) @@ -291,7 +291,7 @@ def test_timestamp_range_to_pb_end_only(): def test_timestamp_range_filter_constructor(): - from google.cloud.bigtable.row_filters import TimestampRangeFilter + from google.cloud.bigtable.deprecated.row_filters import TimestampRangeFilter range_ = object() row_filter = TimestampRangeFilter(range_) @@ -299,7 +299,7 @@ def test_timestamp_range_filter_constructor(): def test_timestamp_range_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import TimestampRangeFilter + from google.cloud.bigtable.deprecated.row_filters import TimestampRangeFilter range_ = object() row_filter1 = TimestampRangeFilter(range_) @@ -308,7 +308,7 @@ def test_timestamp_range_filter___eq__type_differ(): def test_timestamp_range_filter___eq__same_value(): - from google.cloud.bigtable.row_filters import TimestampRangeFilter + from google.cloud.bigtable.deprecated.row_filters import TimestampRangeFilter range_ = object() row_filter1 = TimestampRangeFilter(range_) @@ -317,7 +317,7 @@ def test_timestamp_range_filter___eq__same_value(): def test_timestamp_range_filter___ne__(): - from google.cloud.bigtable.row_filters import TimestampRangeFilter + from google.cloud.bigtable.deprecated.row_filters import TimestampRangeFilter range_ = object() other_range_ = object() @@ -327,8 +327,8 @@ def test_timestamp_range_filter___ne__(): def test_timestamp_range_filter_to_pb(): - from google.cloud.bigtable.row_filters import TimestampRangeFilter - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.deprecated.row_filters import TimestampRangeFilter + from google.cloud.bigtable.deprecated.row_filters import TimestampRange range_ = TimestampRange() row_filter = TimestampRangeFilter(range_) @@ -338,7 +338,7 @@ def test_timestamp_range_filter_to_pb(): def test_column_range_filter_constructor_defaults(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter column_family_id = object() row_filter = ColumnRangeFilter(column_family_id) @@ -350,7 +350,7 @@ def test_column_range_filter_constructor_defaults(): def test_column_range_filter_constructor_explicit(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter column_family_id = object() start_column = object() @@ -372,7 +372,7 @@ def test_column_range_filter_constructor_explicit(): def test_column_range_filter_constructor_bad_start(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter column_family_id = object() with pytest.raises(ValueError): @@ -380,7 +380,7 @@ def test_column_range_filter_constructor_bad_start(): def test_column_range_filter_constructor_bad_end(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter column_family_id = object() with pytest.raises(ValueError): @@ -388,7 +388,7 @@ def test_column_range_filter_constructor_bad_end(): def test_column_range_filter___eq__(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter column_family_id = object() start_column = object() @@ -413,7 +413,7 @@ def test_column_range_filter___eq__(): def test_column_range_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter column_family_id = object() row_filter1 = ColumnRangeFilter(column_family_id) @@ -422,7 +422,7 @@ def test_column_range_filter___eq__type_differ(): def test_column_range_filter___ne__(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter column_family_id = object() other_column_family_id = object() @@ -448,7 +448,7 @@ def test_column_range_filter___ne__(): def test_column_range_filter_to_pb(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter column_family_id = "column-family-id" row_filter = ColumnRangeFilter(column_family_id) @@ -458,7 +458,7 @@ def test_column_range_filter_to_pb(): def test_column_range_filter_to_pb_inclusive_start(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter column_family_id = "column-family-id" column = b"column" @@ -471,7 +471,7 @@ def test_column_range_filter_to_pb_inclusive_start(): def test_column_range_filter_to_pb_exclusive_start(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter column_family_id = "column-family-id" column = b"column" @@ -486,7 +486,7 @@ def test_column_range_filter_to_pb_exclusive_start(): def test_column_range_filter_to_pb_inclusive_end(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter column_family_id = "column-family-id" column = b"column" @@ -499,7 +499,7 @@ def test_column_range_filter_to_pb_inclusive_end(): def test_column_range_filter_to_pb_exclusive_end(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter column_family_id = "column-family-id" column = b"column" @@ -514,7 +514,7 @@ def test_column_range_filter_to_pb_exclusive_end(): def test_value_regex_filter_to_pb_w_bytes(): - from google.cloud.bigtable.row_filters import ValueRegexFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRegexFilter value = regex = b"value-regex" row_filter = ValueRegexFilter(value) @@ -524,7 +524,7 @@ def test_value_regex_filter_to_pb_w_bytes(): def test_value_regex_filter_to_pb_w_str(): - from google.cloud.bigtable.row_filters import ValueRegexFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRegexFilter value = "value-regex" regex = value.encode("ascii") @@ -535,7 +535,7 @@ def test_value_regex_filter_to_pb_w_str(): def test_exact_value_filter_to_pb_w_bytes(): - from google.cloud.bigtable.row_filters import ExactValueFilter + from google.cloud.bigtable.deprecated.row_filters import ExactValueFilter value = regex = b"value-regex" row_filter = ExactValueFilter(value) @@ -545,7 +545,7 @@ def test_exact_value_filter_to_pb_w_bytes(): def test_exact_value_filter_to_pb_w_str(): - from google.cloud.bigtable.row_filters import ExactValueFilter + from google.cloud.bigtable.deprecated.row_filters import ExactValueFilter value = "value-regex" regex = value.encode("ascii") @@ -557,7 +557,7 @@ def test_exact_value_filter_to_pb_w_str(): def test_exact_value_filter_to_pb_w_int(): import struct - from google.cloud.bigtable.row_filters import ExactValueFilter + from google.cloud.bigtable.deprecated.row_filters import ExactValueFilter value = 1 regex = struct.Struct(">q").pack(value) @@ -568,7 +568,7 @@ def test_exact_value_filter_to_pb_w_int(): def test_value_range_filter_constructor_defaults(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter row_filter = ValueRangeFilter() @@ -579,7 +579,7 @@ def test_value_range_filter_constructor_defaults(): def test_value_range_filter_constructor_explicit(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter start_value = object() end_value = object() @@ -600,7 +600,7 @@ def test_value_range_filter_constructor_explicit(): def test_value_range_filter_constructor_w_int_values(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter import struct start_value = 1 @@ -618,21 +618,21 @@ def test_value_range_filter_constructor_w_int_values(): def test_value_range_filter_constructor_bad_start(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter with pytest.raises(ValueError): ValueRangeFilter(inclusive_start=True) def test_value_range_filter_constructor_bad_end(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter with pytest.raises(ValueError): ValueRangeFilter(inclusive_end=True) def test_value_range_filter___eq__(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter start_value = object() end_value = object() @@ -654,7 +654,7 @@ def test_value_range_filter___eq__(): def test_value_range_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter row_filter1 = ValueRangeFilter() row_filter2 = object() @@ -662,7 +662,7 @@ def test_value_range_filter___eq__type_differ(): def test_value_range_filter___ne__(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter start_value = object() other_start_value = object() @@ -685,7 +685,7 @@ def test_value_range_filter___ne__(): def test_value_range_filter_to_pb(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter row_filter = ValueRangeFilter() expected_pb = _RowFilterPB(value_range_filter=_ValueRangePB()) @@ -693,7 +693,7 @@ def test_value_range_filter_to_pb(): def test_value_range_filter_to_pb_inclusive_start(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter value = b"some-value" row_filter = ValueRangeFilter(start_value=value) @@ -703,7 +703,7 @@ def test_value_range_filter_to_pb_inclusive_start(): def test_value_range_filter_to_pb_exclusive_start(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter value = b"some-value" row_filter = ValueRangeFilter(start_value=value, inclusive_start=False) @@ -713,7 +713,7 @@ def test_value_range_filter_to_pb_exclusive_start(): def test_value_range_filter_to_pb_inclusive_end(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter value = b"some-value" row_filter = ValueRangeFilter(end_value=value) @@ -723,7 +723,7 @@ def test_value_range_filter_to_pb_inclusive_end(): def test_value_range_filter_to_pb_exclusive_end(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter value = b"some-value" row_filter = ValueRangeFilter(end_value=value, inclusive_end=False) @@ -733,7 +733,7 @@ def test_value_range_filter_to_pb_exclusive_end(): def test_cell_count_constructor(): - from google.cloud.bigtable.row_filters import _CellCountFilter + from google.cloud.bigtable.deprecated.row_filters import _CellCountFilter num_cells = object() row_filter = _CellCountFilter(num_cells) @@ -741,7 +741,7 @@ def test_cell_count_constructor(): def test_cell_count___eq__type_differ(): - from google.cloud.bigtable.row_filters import _CellCountFilter + from google.cloud.bigtable.deprecated.row_filters import _CellCountFilter num_cells = object() row_filter1 = _CellCountFilter(num_cells) @@ -750,7 +750,7 @@ def test_cell_count___eq__type_differ(): def test_cell_count___eq__same_value(): - from google.cloud.bigtable.row_filters import _CellCountFilter + from google.cloud.bigtable.deprecated.row_filters import _CellCountFilter num_cells = object() row_filter1 = _CellCountFilter(num_cells) @@ -759,7 +759,7 @@ def test_cell_count___eq__same_value(): def test_cell_count___ne__same_value(): - from google.cloud.bigtable.row_filters import _CellCountFilter + from google.cloud.bigtable.deprecated.row_filters import _CellCountFilter num_cells = object() row_filter1 = _CellCountFilter(num_cells) @@ -768,7 +768,7 @@ def test_cell_count___ne__same_value(): def test_cells_row_offset_filter_to_pb(): - from google.cloud.bigtable.row_filters import CellsRowOffsetFilter + from google.cloud.bigtable.deprecated.row_filters import CellsRowOffsetFilter num_cells = 76 row_filter = CellsRowOffsetFilter(num_cells) @@ -778,7 +778,7 @@ def test_cells_row_offset_filter_to_pb(): def test_cells_row_limit_filter_to_pb(): - from google.cloud.bigtable.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.deprecated.row_filters import CellsRowLimitFilter num_cells = 189 row_filter = CellsRowLimitFilter(num_cells) @@ -788,7 +788,7 @@ def test_cells_row_limit_filter_to_pb(): def test_cells_column_limit_filter_to_pb(): - from google.cloud.bigtable.row_filters import CellsColumnLimitFilter + from google.cloud.bigtable.deprecated.row_filters import CellsColumnLimitFilter num_cells = 10 row_filter = CellsColumnLimitFilter(num_cells) @@ -798,7 +798,7 @@ def test_cells_column_limit_filter_to_pb(): def test_strip_value_transformer_filter_to_pb(): - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter flag = True row_filter = StripValueTransformerFilter(flag) @@ -808,7 +808,7 @@ def test_strip_value_transformer_filter_to_pb(): def test_apply_label_filter_constructor(): - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.deprecated.row_filters import ApplyLabelFilter label = object() row_filter = ApplyLabelFilter(label) @@ -816,7 +816,7 @@ def test_apply_label_filter_constructor(): def test_apply_label_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.deprecated.row_filters import ApplyLabelFilter label = object() row_filter1 = ApplyLabelFilter(label) @@ -825,7 +825,7 @@ def test_apply_label_filter___eq__type_differ(): def test_apply_label_filter___eq__same_value(): - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.deprecated.row_filters import ApplyLabelFilter label = object() row_filter1 = ApplyLabelFilter(label) @@ -834,7 +834,7 @@ def test_apply_label_filter___eq__same_value(): def test_apply_label_filter___ne__(): - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.deprecated.row_filters import ApplyLabelFilter label = object() other_label = object() @@ -844,7 +844,7 @@ def test_apply_label_filter___ne__(): def test_apply_label_filter_to_pb(): - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.deprecated.row_filters import ApplyLabelFilter label = "label" row_filter = ApplyLabelFilter(label) @@ -854,14 +854,14 @@ def test_apply_label_filter_to_pb(): def test_filter_combination_constructor_defaults(): - from google.cloud.bigtable.row_filters import _FilterCombination + from google.cloud.bigtable.deprecated.row_filters import _FilterCombination row_filter = _FilterCombination() assert row_filter.filters == [] def test_filter_combination_constructor_explicit(): - from google.cloud.bigtable.row_filters import _FilterCombination + from google.cloud.bigtable.deprecated.row_filters import _FilterCombination filters = object() row_filter = _FilterCombination(filters=filters) @@ -869,7 +869,7 @@ def test_filter_combination_constructor_explicit(): def test_filter_combination___eq__(): - from google.cloud.bigtable.row_filters import _FilterCombination + from google.cloud.bigtable.deprecated.row_filters import _FilterCombination filters = object() row_filter1 = _FilterCombination(filters=filters) @@ -878,7 +878,7 @@ def test_filter_combination___eq__(): def test_filter_combination___eq__type_differ(): - from google.cloud.bigtable.row_filters import _FilterCombination + from google.cloud.bigtable.deprecated.row_filters import _FilterCombination filters = object() row_filter1 = _FilterCombination(filters=filters) @@ -887,7 +887,7 @@ def test_filter_combination___eq__type_differ(): def test_filter_combination___ne__(): - from google.cloud.bigtable.row_filters import _FilterCombination + from google.cloud.bigtable.deprecated.row_filters import _FilterCombination filters = object() other_filters = object() @@ -897,9 +897,9 @@ def test_filter_combination___ne__(): def test_row_filter_chain_to_pb(): - from google.cloud.bigtable.row_filters import RowFilterChain - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.deprecated.row_filters import RowFilterChain + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1.to_pb() @@ -917,10 +917,10 @@ def test_row_filter_chain_to_pb(): def test_row_filter_chain_to_pb_nested(): - from google.cloud.bigtable.row_filters import CellsRowLimitFilter - from google.cloud.bigtable.row_filters import RowFilterChain - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.deprecated.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.deprecated.row_filters import RowFilterChain + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter2 = RowSampleFilter(0.25) @@ -941,9 +941,9 @@ def test_row_filter_chain_to_pb_nested(): def test_row_filter_union_to_pb(): - from google.cloud.bigtable.row_filters import RowFilterUnion - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.deprecated.row_filters import RowFilterUnion + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1.to_pb() @@ -961,10 +961,10 @@ def test_row_filter_union_to_pb(): def test_row_filter_union_to_pb_nested(): - from google.cloud.bigtable.row_filters import CellsRowLimitFilter - from google.cloud.bigtable.row_filters import RowFilterUnion - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.deprecated.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.deprecated.row_filters import RowFilterUnion + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter2 = RowSampleFilter(0.25) @@ -985,7 +985,7 @@ def test_row_filter_union_to_pb_nested(): def test_conditional_row_filter_constructor(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter + from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter base_filter = object() true_filter = object() @@ -999,7 +999,7 @@ def test_conditional_row_filter_constructor(): def test_conditional_row_filter___eq__(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter + from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter base_filter = object() true_filter = object() @@ -1014,7 +1014,7 @@ def test_conditional_row_filter___eq__(): def test_conditional_row_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter + from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter base_filter = object() true_filter = object() @@ -1027,7 +1027,7 @@ def test_conditional_row_filter___eq__type_differ(): def test_conditional_row_filter___ne__(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter + from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter base_filter = object() other_base_filter = object() @@ -1043,10 +1043,10 @@ def test_conditional_row_filter___ne__(): def test_conditional_row_filter_to_pb(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter - from google.cloud.bigtable.row_filters import CellsRowOffsetFilter - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter + from google.cloud.bigtable.deprecated.row_filters import CellsRowOffsetFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1.to_pb() @@ -1073,9 +1073,9 @@ def test_conditional_row_filter_to_pb(): def test_conditional_row_filter_to_pb_true_only(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1.to_pb() @@ -1095,9 +1095,9 @@ def test_conditional_row_filter_to_pb_true_only(): def test_conditional_row_filter_to_pb_false_only(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1.to_pb() diff --git a/tests/unit/test_row_merger.py b/tests/unit/v2_client/test_row_merger.py similarity index 97% rename from tests/unit/test_row_merger.py rename to tests/unit/v2_client/test_row_merger.py index 483c04536..8693634f8 100644 --- a/tests/unit/test_row_merger.py +++ b/tests/unit/v2_client/test_row_merger.py @@ -5,9 +5,9 @@ import proto import pytest -from google.cloud.bigtable.row_data import PartialRowsData, PartialRowData, InvalidChunk +from google.cloud.bigtable.deprecated.row_data import PartialRowsData, PartialRowData, InvalidChunk from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse -from google.cloud.bigtable.row_merger import _RowMerger +from google.cloud.bigtable.deprecated.row_merger import _RowMerger # TODO: autogenerate protos from diff --git a/tests/unit/test_row_set.py b/tests/unit/v2_client/test_row_set.py similarity index 79% rename from tests/unit/test_row_set.py rename to tests/unit/v2_client/test_row_set.py index 1a33be720..ce0e9bfea 100644 --- a/tests/unit/test_row_set.py +++ b/tests/unit/v2_client/test_row_set.py @@ -14,7 +14,7 @@ def test_row_set_constructor(): - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowSet row_set = RowSet() assert [] == row_set.row_keys @@ -22,8 +22,8 @@ def test_row_set_constructor(): def test_row_set__eq__(): - from google.cloud.bigtable.row_set import RowRange - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" @@ -42,7 +42,7 @@ def test_row_set__eq__(): def test_row_set__eq__type_differ(): - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowSet row_set1 = RowSet() row_set2 = object() @@ -50,7 +50,7 @@ def test_row_set__eq__type_differ(): def test_row_set__eq__len_row_keys_differ(): - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" @@ -66,8 +66,8 @@ def test_row_set__eq__len_row_keys_differ(): def test_row_set__eq__len_row_ranges_differ(): - from google.cloud.bigtable.row_set import RowRange - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowSet row_range1 = RowRange(b"row_key4", b"row_key9") row_range2 = RowRange(b"row_key4", b"row_key9") @@ -83,7 +83,7 @@ def test_row_set__eq__len_row_ranges_differ(): def test_row_set__eq__row_keys_differ(): - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowSet row_set1 = RowSet() row_set2 = RowSet() @@ -99,8 +99,8 @@ def test_row_set__eq__row_keys_differ(): def test_row_set__eq__row_ranges_differ(): - from google.cloud.bigtable.row_set import RowRange - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowSet row_range1 = RowRange(b"row_key4", b"row_key9") row_range2 = RowRange(b"row_key14", b"row_key19") @@ -119,8 +119,8 @@ def test_row_set__eq__row_ranges_differ(): def test_row_set__ne__(): - from google.cloud.bigtable.row_set import RowRange - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" @@ -139,8 +139,8 @@ def test_row_set__ne__(): def test_row_set__ne__same_value(): - from google.cloud.bigtable.row_set import RowRange - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" @@ -159,7 +159,7 @@ def test_row_set__ne__same_value(): def test_row_set_add_row_key(): - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowSet row_set = RowSet() row_set.add_row_key("row_key1") @@ -168,8 +168,8 @@ def test_row_set_add_row_key(): def test_row_set_add_row_range(): - from google.cloud.bigtable.row_set import RowRange - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowSet row_set = RowSet() row_range1 = RowRange(b"row_key1", b"row_key9") @@ -181,7 +181,7 @@ def test_row_set_add_row_range(): def test_row_set_add_row_range_from_keys(): - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowSet row_set = RowSet() row_set.add_row_range_from_keys( @@ -194,7 +194,7 @@ def test_row_set_add_row_range_from_keys(): def test_row_set_add_row_range_with_prefix(): - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowSet row_set = RowSet() row_set.add_row_range_with_prefix("row") @@ -203,8 +203,8 @@ def test_row_set_add_row_range_with_prefix(): def test_row_set__update_message_request(): from google.cloud._helpers import _to_bytes - from google.cloud.bigtable.row_set import RowRange - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowSet row_set = RowSet() table_name = "table_name" @@ -224,7 +224,7 @@ def test_row_set__update_message_request(): def test_row_range_constructor(): - from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowRange start_key = "row_key1" end_key = "row_key9" @@ -236,7 +236,7 @@ def test_row_range_constructor(): def test_row_range___hash__set_equality(): - from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowRange row_range1 = RowRange("row_key1", "row_key9") row_range2 = RowRange("row_key1", "row_key9") @@ -246,7 +246,7 @@ def test_row_range___hash__set_equality(): def test_row_range___hash__not_equals(): - from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowRange row_range1 = RowRange("row_key1", "row_key9") row_range2 = RowRange("row_key1", "row_key19") @@ -256,7 +256,7 @@ def test_row_range___hash__not_equals(): def test_row_range__eq__(): - from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" @@ -266,7 +266,7 @@ def test_row_range__eq__(): def test_row_range___eq__type_differ(): - from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" @@ -276,7 +276,7 @@ def test_row_range___eq__type_differ(): def test_row_range__ne__(): - from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" @@ -286,7 +286,7 @@ def test_row_range__ne__(): def test_row_range__ne__same_value(): - from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" @@ -296,7 +296,7 @@ def test_row_range__ne__same_value(): def test_row_range_get_range_kwargs_closed_open(): - from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" @@ -307,7 +307,7 @@ def test_row_range_get_range_kwargs_closed_open(): def test_row_range_get_range_kwargs_open_closed(): - from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" diff --git a/tests/unit/test_table.py b/tests/unit/v2_client/test_table.py similarity index 91% rename from tests/unit/test_table.py rename to tests/unit/v2_client/test_table.py index e66a8f0f6..b885c2e5c 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/v2_client/test_table.py @@ -50,11 +50,11 @@ STATUS_INTERNAL = StatusCode.INTERNAL.value[0] -@mock.patch("google.cloud.bigtable.table._MAX_BULK_MUTATIONS", new=3) +@mock.patch("google.cloud.bigtable.deprecated.table._MAX_BULK_MUTATIONS", new=3) def test__compile_mutation_entries_w_too_many_mutations(): - from google.cloud.bigtable.row import DirectRow - from google.cloud.bigtable.table import TooManyMutationsError - from google.cloud.bigtable.table import _compile_mutation_entries + from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.deprecated.table import TooManyMutationsError + from google.cloud.bigtable.deprecated.table import _compile_mutation_entries table = mock.Mock(name="table", spec=["name"]) table.name = "table" @@ -72,8 +72,8 @@ def test__compile_mutation_entries_w_too_many_mutations(): def test__compile_mutation_entries_normal(): - from google.cloud.bigtable.row import DirectRow - from google.cloud.bigtable.table import _compile_mutation_entries + from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.deprecated.table import _compile_mutation_entries from google.cloud.bigtable_v2.types import MutateRowsRequest from google.cloud.bigtable_v2.types import data @@ -109,9 +109,9 @@ def test__compile_mutation_entries_normal(): def test__check_row_table_name_w_wrong_table_name(): - from google.cloud.bigtable.table import _check_row_table_name - from google.cloud.bigtable.table import TableMismatchError - from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.deprecated.table import _check_row_table_name + from google.cloud.bigtable.deprecated.table import TableMismatchError + from google.cloud.bigtable.deprecated.row import DirectRow table = mock.Mock(name="table", spec=["name"]) table.name = "table" @@ -122,8 +122,8 @@ def test__check_row_table_name_w_wrong_table_name(): def test__check_row_table_name_w_right_table_name(): - from google.cloud.bigtable.row import DirectRow - from google.cloud.bigtable.table import _check_row_table_name + from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.deprecated.table import _check_row_table_name table = mock.Mock(name="table", spec=["name"]) table.name = "table" @@ -133,8 +133,8 @@ def test__check_row_table_name_w_right_table_name(): def test__check_row_type_w_wrong_row_type(): - from google.cloud.bigtable.row import ConditionalRow - from google.cloud.bigtable.table import _check_row_type + from google.cloud.bigtable.deprecated.row import ConditionalRow + from google.cloud.bigtable.deprecated.table import _check_row_type row = ConditionalRow(row_key=b"row_key", table="table", filter_=None) with pytest.raises(TypeError): @@ -142,21 +142,21 @@ def test__check_row_type_w_wrong_row_type(): def test__check_row_type_w_right_row_type(): - from google.cloud.bigtable.row import DirectRow - from google.cloud.bigtable.table import _check_row_type + from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.deprecated.table import _check_row_type row = DirectRow(row_key=b"row_key", table="table") assert not _check_row_type(row) def _make_client(*args, **kwargs): - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client return Client(*args, **kwargs) def _make_table(*args, **kwargs): - from google.cloud.bigtable.table import Table + from google.cloud.bigtable.deprecated.table import Table return Table(*args, **kwargs) @@ -219,7 +219,7 @@ def _table_row_methods_helper(): def test_table_row_factory_direct(): - from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.deprecated.row import DirectRow table, row_key = _table_row_methods_helper() with warnings.catch_warnings(record=True) as warned: @@ -234,7 +234,7 @@ def test_table_row_factory_direct(): def test_table_row_factory_conditional(): - from google.cloud.bigtable.row import ConditionalRow + from google.cloud.bigtable.deprecated.row import ConditionalRow table, row_key = _table_row_methods_helper() filter_ = object() @@ -251,7 +251,7 @@ def test_table_row_factory_conditional(): def test_table_row_factory_append(): - from google.cloud.bigtable.row import AppendRow + from google.cloud.bigtable.deprecated.row import AppendRow table, row_key = _table_row_methods_helper() @@ -278,7 +278,7 @@ def test_table_row_factory_failure(): def test_table_direct_row(): - from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.deprecated.row import DirectRow table, row_key = _table_row_methods_helper() row = table.direct_row(row_key) @@ -289,7 +289,7 @@ def test_table_direct_row(): def test_table_conditional_row(): - from google.cloud.bigtable.row import ConditionalRow + from google.cloud.bigtable.deprecated.row import ConditionalRow table, row_key = _table_row_methods_helper() filter_ = object() @@ -301,7 +301,7 @@ def test_table_conditional_row(): def test_table_append_row(): - from google.cloud.bigtable.row import AppendRow + from google.cloud.bigtable.deprecated.row import AppendRow table, row_key = _table_row_methods_helper() row = table.append_row(row_key) @@ -357,7 +357,7 @@ def _create_table_helper(split_keys=[], column_families={}): from google.cloud.bigtable_admin_v2.types import ( bigtable_table_admin as table_admin_messages_v2_pb2, ) - from google.cloud.bigtable.column_family import ColumnFamily + from google.cloud.bigtable.deprecated.column_family import ColumnFamily credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -391,7 +391,7 @@ def test_table_create(): def test_table_create_with_families(): - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule families = {"family": MaxVersionsGCRule(5)} _create_table_helper(column_families=families) @@ -404,7 +404,7 @@ def test_table_create_with_split_keys(): def test_table_exists_hit(): from google.cloud.bigtable_admin_v2.types import ListTablesResponse from google.cloud.bigtable_admin_v2.types import Table - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import enums credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -426,7 +426,7 @@ def test_table_exists_hit(): def test_table_exists_miss(): from google.api_core.exceptions import NotFound - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import enums credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -447,7 +447,7 @@ def test_table_exists_miss(): def test_table_exists_error(): from google.api_core.exceptions import BadRequest - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import enums credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -512,8 +512,8 @@ def test_table_list_column_families(): def test_table_get_cluster_states(): - from google.cloud.bigtable.enums import Table as enum_table - from google.cloud.bigtable.table import ClusterState + from google.cloud.bigtable.deprecated.enums import Table as enum_table + from google.cloud.bigtable.deprecated.table import ClusterState INITIALIZING = enum_table.ReplicationState.INITIALIZING PLANNED_MAINTENANCE = enum_table.ReplicationState.PLANNED_MAINTENANCE @@ -557,10 +557,10 @@ def test_table_get_cluster_states(): def test_table_get_encryption_info(): from google.rpc.code_pb2 import Code - from google.cloud.bigtable.encryption_info import EncryptionInfo - from google.cloud.bigtable.enums import EncryptionInfo as enum_crypto - from google.cloud.bigtable.enums import Table as enum_table - from google.cloud.bigtable.error import Status + from google.cloud.bigtable.deprecated.encryption_info import EncryptionInfo + from google.cloud.bigtable.deprecated.enums import EncryptionInfo as enum_crypto + from google.cloud.bigtable.deprecated.enums import Table as enum_table + from google.cloud.bigtable.deprecated.error import Status ENCRYPTION_TYPE_UNSPECIFIED = enum_crypto.EncryptionType.ENCRYPTION_TYPE_UNSPECIFIED GOOGLE_DEFAULT_ENCRYPTION = enum_crypto.EncryptionType.GOOGLE_DEFAULT_ENCRYPTION @@ -640,9 +640,9 @@ def _make_data_api(): def _table_read_row_helper(chunks, expected_result, app_profile_id=None): from google.cloud._testing import _Monkey - from google.cloud.bigtable import table as MUT - from google.cloud.bigtable.row_set import RowSet - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated import table as MUT + from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -704,8 +704,8 @@ def test_table_read_row_miss_no_chunks_in_response(): def test_table_read_row_complete(): - from google.cloud.bigtable.row_data import Cell - from google.cloud.bigtable.row_data import PartialRowData + from google.cloud.bigtable.deprecated.row_data import Cell + from google.cloud.bigtable.deprecated.row_data import PartialRowData app_profile_id = "app-profile-id" chunk = _ReadRowsResponseCellChunkPB( @@ -768,7 +768,7 @@ def _table_mutate_rows_helper( mutation_timeout=None, app_profile_id=None, retry=None, timeout=None ): from google.rpc.status_pb2 import Status - from google.cloud.bigtable.table import DEFAULT_RETRY + from google.cloud.bigtable.deprecated.table import DEFAULT_RETRY credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -787,7 +787,7 @@ def _table_mutate_rows_helper( response = [Status(code=0), Status(code=1)] instance_mock = mock.Mock(return_value=response) klass_mock = mock.patch( - "google.cloud.bigtable.table._RetryableMutateRowsWorker", + "google.cloud.bigtable.deprecated.table._RetryableMutateRowsWorker", new=mock.MagicMock(return_value=instance_mock), ) @@ -854,9 +854,9 @@ def test_table_mutate_rows_w_mutation_timeout_and_timeout_arg(): def test_table_read_rows(): from google.cloud._testing import _Monkey - from google.cloud.bigtable.row_data import PartialRowsData - from google.cloud.bigtable import table as MUT - from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS + from google.cloud.bigtable.deprecated.row_data import PartialRowsData + from google.cloud.bigtable.deprecated import table as MUT + from google.cloud.bigtable.deprecated.row_data import DEFAULT_RETRY_READ_ROWS credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -1017,7 +1017,7 @@ def test_table_read_retry_rows_no_full_table_scan(): def test_table_yield_retry_rows(): - from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.deprecated.table import _create_row_request credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -1079,9 +1079,9 @@ def test_table_yield_retry_rows(): def test_table_yield_rows_with_row_set(): - from google.cloud.bigtable.row_set import RowSet - from google.cloud.bigtable.row_set import RowRange - from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.deprecated.table import _create_row_request credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -1174,7 +1174,7 @@ def test_table_truncate(): table = _make_table(TABLE_ID, instance) table_api = client._table_admin_client = _make_table_api() - with mock.patch("google.cloud.bigtable.table.Table.name", new=TABLE_NAME): + with mock.patch("google.cloud.bigtable.deprecated.table.Table.name", new=TABLE_NAME): result = table.truncate() assert result is None @@ -1255,7 +1255,7 @@ def test_table_mutations_batcher_factory(): def test_table_get_iam_policy(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -1286,8 +1286,8 @@ def test_table_get_iam_policy(): def test_table_set_iam_policy(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.policy import Policy - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -1349,7 +1349,7 @@ def test_table_test_iam_permissions(): def test_table_backup_factory_defaults(): - from google.cloud.bigtable.backup import Backup + from google.cloud.bigtable.deprecated.backup import Backup instance = _make_table(INSTANCE_ID, None) table = _make_table(TABLE_ID, instance) @@ -1373,8 +1373,8 @@ def test_table_backup_factory_defaults(): def test_table_backup_factory_non_defaults(): import datetime from google.cloud._helpers import UTC - from google.cloud.bigtable.backup import Backup - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.backup import Backup + from google.cloud.bigtable.deprecated.instance import Instance instance = Instance(INSTANCE_ID, None) table = _make_table(TABLE_ID, instance) @@ -1404,7 +1404,7 @@ def _table_list_backups_helper(cluster_id=None, filter_=None, **kwargs): Backup as backup_pb, bigtable_table_admin, ) - from google.cloud.bigtable.backup import Backup + from google.cloud.bigtable.deprecated.backup import Backup client = _make_client( project=PROJECT_ID, credentials=_make_credentials(), admin=True @@ -1466,7 +1466,7 @@ def test_table_list_backups_w_options(): def _table_restore_helper(backup_name=None): - from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.deprecated.instance import Instance op_future = object() credentials = _make_credentials() @@ -1502,7 +1502,7 @@ def test_table_restore_table_w_backup_name(): def _make_worker(*args, **kwargs): - from google.cloud.bigtable.table import _RetryableMutateRowsWorker + from google.cloud.bigtable.deprecated.table import _RetryableMutateRowsWorker return _RetryableMutateRowsWorker(*args, **kwargs) @@ -1543,7 +1543,7 @@ def test_rmrw_callable_empty_rows(): def test_rmrw_callable_no_retry_strategy(): - from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.deprecated.row import DirectRow # Setup: # - Mutate 3 rows. @@ -1585,8 +1585,8 @@ def test_rmrw_callable_no_retry_strategy(): def test_rmrw_callable_retry(): - from google.cloud.bigtable.row import DirectRow - from google.cloud.bigtable.table import DEFAULT_RETRY + from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.deprecated.table import DEFAULT_RETRY # Setup: # - Mutate 3 rows. @@ -1640,8 +1640,8 @@ def _do_mutate_retryable_rows_helper( mutate_rows_side_effect=None, ): from google.api_core.exceptions import ServiceUnavailable - from google.cloud.bigtable.row import DirectRow - from google.cloud.bigtable.table import _BigtableRetryableError + from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.deprecated.table import _BigtableRetryableError from google.cloud.bigtable_v2.types import bigtable as data_messages_v2_pb2 # Setup: @@ -1797,7 +1797,7 @@ def test_rmrw_do_mutate_retryable_rows_w_retryable_error_internal_rst_stream_err # Raise internal server error with RST STREAM error messages # There should be no error raised and that the request is retried from google.api_core.exceptions import InternalServerError - from google.cloud.bigtable.row_data import RETRYABLE_INTERNAL_ERROR_MESSAGES + from google.cloud.bigtable.deprecated.row_data import RETRYABLE_INTERNAL_ERROR_MESSAGES row_cells = [ (b"row_key_1", ("cf", b"col", b"value1")), @@ -2003,7 +2003,7 @@ def test_rmrw_do_mutate_retryable_rows_mismatch_num_responses(): def test__create_row_request_table_name_only(): - from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.deprecated.table import _create_row_request table_name = "table_name" result = _create_row_request(table_name) @@ -2012,14 +2012,14 @@ def test__create_row_request_table_name_only(): def test__create_row_request_row_range_row_set_conflict(): - from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.deprecated.table import _create_row_request with pytest.raises(ValueError): _create_row_request(None, end_key=object(), row_set=object()) def test__create_row_request_row_range_start_key(): - from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.deprecated.table import _create_row_request from google.cloud.bigtable_v2.types import RowRange table_name = "table_name" @@ -2032,7 +2032,7 @@ def test__create_row_request_row_range_start_key(): def test__create_row_request_row_range_end_key(): - from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.deprecated.table import _create_row_request from google.cloud.bigtable_v2.types import RowRange table_name = "table_name" @@ -2045,7 +2045,7 @@ def test__create_row_request_row_range_end_key(): def test__create_row_request_row_range_both_keys(): - from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.deprecated.table import _create_row_request from google.cloud.bigtable_v2.types import RowRange table_name = "table_name" @@ -2059,7 +2059,7 @@ def test__create_row_request_row_range_both_keys(): def test__create_row_request_row_range_both_keys_inclusive(): - from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.deprecated.table import _create_row_request from google.cloud.bigtable_v2.types import RowRange table_name = "table_name" @@ -2075,8 +2075,8 @@ def test__create_row_request_row_range_both_keys_inclusive(): def test__create_row_request_with_filter(): - from google.cloud.bigtable.table import _create_row_request - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.deprecated.table import _create_row_request + from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter table_name = "table_name" row_filter = RowSampleFilter(0.33) @@ -2088,7 +2088,7 @@ def test__create_row_request_with_filter(): def test__create_row_request_with_limit(): - from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.deprecated.table import _create_row_request table_name = "table_name" limit = 1337 @@ -2098,8 +2098,8 @@ def test__create_row_request_with_limit(): def test__create_row_request_with_row_set(): - from google.cloud.bigtable.table import _create_row_request - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.table import _create_row_request + from google.cloud.bigtable.deprecated.row_set import RowSet table_name = "table_name" row_set = RowSet() @@ -2109,7 +2109,7 @@ def test__create_row_request_with_row_set(): def test__create_row_request_with_app_profile_id(): - from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.deprecated.table import _create_row_request table_name = "table_name" limit = 1337 @@ -2128,8 +2128,8 @@ def _ReadRowsRequestPB(*args, **kw): def test_cluster_state___eq__(): - from google.cloud.bigtable.enums import Table as enum_table - from google.cloud.bigtable.table import ClusterState + from google.cloud.bigtable.deprecated.enums import Table as enum_table + from google.cloud.bigtable.deprecated.table import ClusterState READY = enum_table.ReplicationState.READY state1 = ClusterState(READY) @@ -2138,8 +2138,8 @@ def test_cluster_state___eq__(): def test_cluster_state___eq__type_differ(): - from google.cloud.bigtable.enums import Table as enum_table - from google.cloud.bigtable.table import ClusterState + from google.cloud.bigtable.deprecated.enums import Table as enum_table + from google.cloud.bigtable.deprecated.table import ClusterState READY = enum_table.ReplicationState.READY state1 = ClusterState(READY) @@ -2148,8 +2148,8 @@ def test_cluster_state___eq__type_differ(): def test_cluster_state___ne__same_value(): - from google.cloud.bigtable.enums import Table as enum_table - from google.cloud.bigtable.table import ClusterState + from google.cloud.bigtable.deprecated.enums import Table as enum_table + from google.cloud.bigtable.deprecated.table import ClusterState READY = enum_table.ReplicationState.READY state1 = ClusterState(READY) @@ -2158,8 +2158,8 @@ def test_cluster_state___ne__same_value(): def test_cluster_state___ne__(): - from google.cloud.bigtable.enums import Table as enum_table - from google.cloud.bigtable.table import ClusterState + from google.cloud.bigtable.deprecated.enums import Table as enum_table + from google.cloud.bigtable.deprecated.table import ClusterState READY = enum_table.ReplicationState.READY INITIALIZING = enum_table.ReplicationState.INITIALIZING @@ -2169,8 +2169,8 @@ def test_cluster_state___ne__(): def test_cluster_state__repr__(): - from google.cloud.bigtable.enums import Table as enum_table - from google.cloud.bigtable.table import ClusterState + from google.cloud.bigtable.deprecated.enums import Table as enum_table + from google.cloud.bigtable.deprecated.table import ClusterState STATE_NOT_KNOWN = enum_table.ReplicationState.STATE_NOT_KNOWN INITIALIZING = enum_table.ReplicationState.INITIALIZING From 5ea2bc3e0e1486bb1ad98241fca6e9817a332f5d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 13:33:06 -0800 Subject: [PATCH 006/349] updated system tests --- tests/system/__init__.py | 2 +- tests/system/v2_client/__init__.py | 15 ++++++++++++++ tests/system/{ => v2_client}/_helpers.py | 0 tests/system/{ => v2_client}/conftest.py | 2 +- tests/system/{ => v2_client}/test_data_api.py | 20 +++++++++---------- .../{ => v2_client}/test_instance_admin.py | 6 +++--- .../{ => v2_client}/test_table_admin.py | 12 +++++------ 7 files changed, 36 insertions(+), 21 deletions(-) create mode 100644 tests/system/v2_client/__init__.py rename tests/system/{ => v2_client}/_helpers.py (100%) rename tests/system/{ => v2_client}/conftest.py (98%) rename tests/system/{ => v2_client}/test_data_api.py (94%) rename tests/system/{ => v2_client}/test_instance_admin.py (99%) rename tests/system/{ => v2_client}/test_table_admin.py (96%) diff --git a/tests/system/__init__.py b/tests/system/__init__.py index 4de65971c..89a37dc92 100644 --- a/tests/system/__init__.py +++ b/tests/system/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/system/v2_client/__init__.py b/tests/system/v2_client/__init__.py new file mode 100644 index 000000000..4de65971c --- /dev/null +++ b/tests/system/v2_client/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/tests/system/_helpers.py b/tests/system/v2_client/_helpers.py similarity index 100% rename from tests/system/_helpers.py rename to tests/system/v2_client/_helpers.py diff --git a/tests/system/conftest.py b/tests/system/v2_client/conftest.py similarity index 98% rename from tests/system/conftest.py rename to tests/system/v2_client/conftest.py index f39fcba88..bb4f54b41 100644 --- a/tests/system/conftest.py +++ b/tests/system/v2_client/conftest.py @@ -17,7 +17,7 @@ import pytest from test_utils.system import unique_resource_id -from google.cloud.bigtable.client import Client +from google.cloud.bigtable.deprecated.client import Client from google.cloud.environment_vars import BIGTABLE_EMULATOR from . import _helpers diff --git a/tests/system/test_data_api.py b/tests/system/v2_client/test_data_api.py similarity index 94% rename from tests/system/test_data_api.py rename to tests/system/v2_client/test_data_api.py index 2ca7e1504..551a221ee 100644 --- a/tests/system/test_data_api.py +++ b/tests/system/v2_client/test_data_api.py @@ -60,7 +60,7 @@ def rows_to_delete(): def test_table_read_rows_filter_millis(data_table): - from google.cloud.bigtable import row_filters + from google.cloud.bigtable.deprecated import row_filters end = datetime.datetime.now() start = end - datetime.timedelta(minutes=60) @@ -158,8 +158,8 @@ def test_table_drop_by_prefix(data_table, rows_to_delete): def test_table_read_rows_w_row_set(data_table, rows_to_delete): - from google.cloud.bigtable.row_set import RowSet - from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowRange row_keys = [ b"row_key_1", @@ -189,7 +189,7 @@ def test_table_read_rows_w_row_set(data_table, rows_to_delete): def test_rowset_add_row_range_w_pfx(data_table, rows_to_delete): - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowSet row_keys = [ b"row_key_1", @@ -234,7 +234,7 @@ def _write_to_row(row1, row2, row3, row4): from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _microseconds_from_datetime from google.cloud._helpers import UTC - from google.cloud.bigtable.row_data import Cell + from google.cloud.bigtable.deprecated.row_data import Cell timestamp1 = datetime.datetime.utcnow().replace(tzinfo=UTC) timestamp1_micros = _microseconds_from_datetime(timestamp1) @@ -290,7 +290,7 @@ def test_table_read_row(data_table, rows_to_delete): def test_table_read_rows(data_table, rows_to_delete): - from google.cloud.bigtable.row_data import PartialRowData + from google.cloud.bigtable.deprecated.row_data import PartialRowData row = data_table.direct_row(ROW_KEY) rows_to_delete.append(row) @@ -326,10 +326,10 @@ def test_table_read_rows(data_table, rows_to_delete): def test_read_with_label_applied(data_table, rows_to_delete, skip_on_emulator): - from google.cloud.bigtable.row_filters import ApplyLabelFilter - from google.cloud.bigtable.row_filters import ColumnQualifierRegexFilter - from google.cloud.bigtable.row_filters import RowFilterChain - from google.cloud.bigtable.row_filters import RowFilterUnion + from google.cloud.bigtable.deprecated.row_filters import ApplyLabelFilter + from google.cloud.bigtable.deprecated.row_filters import ColumnQualifierRegexFilter + from google.cloud.bigtable.deprecated.row_filters import RowFilterChain + from google.cloud.bigtable.deprecated.row_filters import RowFilterUnion row = data_table.direct_row(ROW_KEY) rows_to_delete.append(row) diff --git a/tests/system/test_instance_admin.py b/tests/system/v2_client/test_instance_admin.py similarity index 99% rename from tests/system/test_instance_admin.py rename to tests/system/v2_client/test_instance_admin.py index e5e311213..debe1ab56 100644 --- a/tests/system/test_instance_admin.py +++ b/tests/system/v2_client/test_instance_admin.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.cloud.bigtable import enums -from google.cloud.bigtable.table import ClusterState +from google.cloud.bigtable.deprecated import enums +from google.cloud.bigtable.deprecated.table import ClusterState from . import _helpers @@ -149,7 +149,7 @@ def test_instance_create_prod( instances_to_delete, skip_on_emulator, ): - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import enums alt_instance_id = f"ndef{unique_suffix}" instance = admin_client.instance(alt_instance_id, labels=instance_labels) diff --git a/tests/system/test_table_admin.py b/tests/system/v2_client/test_table_admin.py similarity index 96% rename from tests/system/test_table_admin.py rename to tests/system/v2_client/test_table_admin.py index c50189013..107ed41bf 100644 --- a/tests/system/test_table_admin.py +++ b/tests/system/v2_client/test_table_admin.py @@ -97,7 +97,7 @@ def test_table_create_w_families( data_instance_populated, tables_to_delete, ): - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule temp_table_id = "test-create-table-with-failies" column_family_id = "col-fam-id1" @@ -134,7 +134,7 @@ def test_table_create_w_split_keys( def test_column_family_create(data_instance_populated, tables_to_delete): - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule temp_table_id = "test-create-column-family" temp_table = data_instance_populated.table(temp_table_id) @@ -158,7 +158,7 @@ def test_column_family_create(data_instance_populated, tables_to_delete): def test_column_family_update(data_instance_populated, tables_to_delete): - from google.cloud.bigtable.column_family import MaxVersionsGCRule + from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule temp_table_id = "test-update-column-family" temp_table = data_instance_populated.table(temp_table_id) @@ -219,8 +219,8 @@ def test_table_get_iam_policy( def test_table_set_iam_policy( service_account, data_instance_populated, tables_to_delete, skip_on_emulator ): - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE - from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated.policy import Policy temp_table_id = "test-set-iam-policy-table" temp_table = data_instance_populated.table(temp_table_id) @@ -264,7 +264,7 @@ def test_table_backup( skip_on_emulator, ): from google.cloud._helpers import _datetime_to_pb_timestamp - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import enums temp_table_id = "test-backup-table" temp_table = data_instance_populated.table(temp_table_id) From 84fd9c3cf5619f387a60bc612de190942cf74856 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 13:52:34 -0800 Subject: [PATCH 007/349] ran blacken --- google/cloud/bigtable/client.py | 230 ++++++++++++------------ tests/unit/v2_client/test_client.py | 8 +- tests/unit/v2_client/test_row_merger.py | 6 +- tests/unit/v2_client/test_table.py | 8 +- 4 files changed, 135 insertions(+), 117 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 19edc4ff4..4cb5ccdc5 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -15,16 +15,21 @@ from __future__ import annotations +from typing import Any, AsyncIterable + from google.cloud.client import ClientWithProject + class BigtableDataClient(ClientWithProject): def __init__( self, *, - project: str|None = None, - credentials: google.auth.credentials.Credentials|None = None, - client_options: dict[str, Any] | "google.api_core.client_options.ClientOptions" | None = None, - metadata: list[tuple[str, str]]|None = None, + project: str | None = None, + credentials: google.auth.credentials.Credentials | None = None, + client_options: dict[str, Any] + | "google.api_core.client_options.ClientOptions" + | None = None, + metadata: list[tuple[str, str]] | None = None, ): """ Create a client instance @@ -34,47 +39,49 @@ def __init__( """ pass - - def get_table(self, instance_id:str, table_id:str, app_profile_id:str|None=None) -> Table: + def get_table( + self, instance_id: str, table_id: str, app_profile_id: str | None = None + ) -> Table: return Table(self, instance_id, table_id, app_profile_id) -class Table(): + +class Table: """ Main Data API surface - Table object maintains instance_id, table_id, and app_profile_id context, and passes them with + Table object maintains instance_id, table_id, and app_profile_id context, and passes them with each call """ def __init__( self, - client:BigtableDataClient, - instance_id:str, + client: BigtableDataClient, + instance_id: str, table_id: str, - app_profile_id:str|None=None + app_profile_id: str | None = None, ): pass async def read_rows_stream( self, - query: ReadRowsQuery|dict, + query: ReadRowsQuery | dict, *, - shard:bool=False, - limit:int|None, - cache_size_limit:int|None=None, - operation_timeout:int|float|None=60, - per_row_timeout:int|float|None=10, - idle_timeout:int|float|None=300, - per_request_timeout:int|float|None=None, - metadata: list[tuple[str, str]]|None = None, + shard: bool = False, + limit: int | None, + cache_size_limit: int | None = None, + operation_timeout: int | float | None = 60, + per_row_timeout: int | float | None = 10, + idle_timeout: int | float | None = 300, + per_request_timeout: int | float | None = None, + metadata: list[tuple[str, str]] | None = None, ) -> AsyncIterable[RowResponse]: """ Returns a generator to asynchronously stream back row data. Failed requests within operation_timeout and operation_deadline policies will be retried. - By default, row data is streamed eagerly over the network, and fully cached in memory - in the generator, which can be consumed as needed. The size of the generator cache can + By default, row data is streamed eagerly over the network, and fully cached in memory + in the generator, which can be consumed as needed. The size of the generator cache can be configured with cache_size_limit. When the cache is full, the read_rows_stream will pause the network stream until space is available @@ -82,33 +89,33 @@ async def read_rows_stream( - query: contains details about which rows to return - shard: if True, will attempt to split up and distribute query to multiple backend nodes in parallel - - limit: a limit on the number of rows to return. Actual limit will be + - limit: a limit on the number of rows to return. Actual limit will be min(limit, query.limit) - - cache_size: the number of rows to cache in memory. If None, no limits. + - cache_size: the number of rows to cache in memory. If None, no limits. Defaults to None - - operation_timeout: the time budget for the entire operation, in seconds. + - operation_timeout: the time budget for the entire operation, in seconds. Failed requests will be retried within the budget. - time is only counted while actively waiting on the network. + time is only counted while actively waiting on the network. Completed and cached results can still be accessed after the deadline is complete, with a DeadlineExceeded exception only raised after cached results are exhausted - per_row_timeout: the time budget for a single row read, in seconds. If a row takes - longer than per_row_timeout to complete, the ongoing network request will be with a + longer than per_row_timeout to complete, the ongoing network request will be with a DeadlineExceeded exception, and a retry may be attempted Applies only to the underlying network call. - - idle_timeout: the number of idle seconds before an active generator is marked as - stale and the cache is drained. The idle count is reset each time the generator + - idle_timeout: the number of idle seconds before an active generator is marked as + stale and the cache is drained. The idle count is reset each time the generator is yielded from raises DeadlineExceeded on future yields - - per_request_timeout: the time budget for an individual network request, in seconds. - If it takes longer than this time to complete, the request will be cancelled with + - per_request_timeout: the time budget for an individual network request, in seconds. + If it takes longer than this time to complete, the request will be cancelled with a DeadlineExceeded exception, and a retry will be attempted - metadata: Strings which should be sent along with the request as metadata headers. - + Returns: - an asynchronous generator that yields rows returned by the query Raises: - DeadlineExceeded: raised after operation timeout - will be chained with a RetryExceptionGroup containing GoogleAPIError exceptions + will be chained with a RetryExceptionGroup containing GoogleAPIError exceptions from any retries that failed - IdleTimeout: if generator was abandoned """ @@ -116,14 +123,14 @@ async def read_rows_stream( async def read_rows( self, - query: ReadRowsQuery|dict, + query: ReadRowsQuery | dict, *, - shard:bool=False, - limit:int|None, - operation_timeout:int|float|None=60, - per_row_timeout:int|float|None=10, - per_request_timeout:int|float|None=None, - metadata: list[tuple[str, str]]|None = None, + shard: bool = False, + limit: int | None, + operation_timeout: int | float | None = 60, + per_row_timeout: int | float | None = 10, + per_request_timeout: int | float | None = None, + metadata: list[tuple[str, str]] | None = None, ) -> List[RowResponse]: """ Helper function that returns a full list instead of a generator @@ -137,11 +144,11 @@ async def read_rows( async def read_row( self, - row_key:str|bytes, + row_key: str | bytes, *, - operation_timeout:int|float|None=60, - per_request_timeout:int|float|None=None, - metadata: list[tuple[str, str]]|None = None, + operation_timeout: int | float | None = 60, + per_request_timeout: int | float | None = None, + metadata: list[tuple[str, str]] | None = None, ) -> RowResponse: """ Helper function to return a single row @@ -155,15 +162,15 @@ async def read_row( async def read_rows_sharded( self, - query_list: list[ReadRowsQuery]|list[dict], + query_list: list[ReadRowsQuery] | list[dict], *, - limit:int|None, - cache_size_limit:int|None=None, - operation_timeout:int|float|None=60, - per_row_timeout:int|float|None=10, - idle_timeout:int|float|None=300, - per_request_timeout:int|float|None=None, - metadata: list[tuple[str, str]]|None = None, + limit: int | None, + cache_size_limit: int | None = None, + operation_timeout: int | float | None = 60, + per_row_timeout: int | float | None = 10, + idle_timeout: int | float | None = 300, + per_request_timeout: int | float | None = None, + metadata: list[tuple[str, str]] | None = None, ) -> AsyncIterable[RowResponse]: """ Runs a sharded query in parallel @@ -178,11 +185,11 @@ async def read_rows_sharded( async def row_exists( self, - row_key:str|bytes, + row_key: str | bytes, *, - operation_timeout:int|float|None=60, - per_request_timeout:int|float|None=None, - metadata: list[tuple[str, str]]|None = None, + operation_timeout: int | float | None = 60, + per_request_timeout: int | float | None = None, + metadata: list[tuple[str, str]] | None = None, ) -> bool: """ Helper function to determine if a row exists @@ -194,15 +201,13 @@ async def row_exists( """ pass - - async def sample_keys( self, *, - operation_timeout:int|float|None=60, - per_sample_timeout:int|float|None=10, - per_request_timeout:int|float|None=None, - metadata: list[tuple[str, str]]|None = None, + operation_timeout: int | float | None = 60, + per_sample_timeout: int | float | None = 10, + per_request_timeout: int | float | None = None, + metadata: list[tuple[str, str]] | None = None, ) -> RowKeySamples: """ Return a set of RowKeySamples that delimit contiguous sections of the table of @@ -219,16 +224,16 @@ async def sample_keys( - a set of RowKeySamples the delimit contiguous sections of the table Raises: - DeadlineExceeded: raised after operation timeout - will be chained with a RetryExceptionGroup containing all GoogleAPIError + will be chained with a RetryExceptionGroup containing all GoogleAPIError exceptions from any retries that failed """ pass def mutations_batcher(self, **kwargs) -> MutationsBatcher: """ - Returns a new mutations batcher instance. + Returns a new mutations batcher instance. - Can be used to iteratively add mutations that are flushed as a group, + Can be used to iteratively add mutations that are flushed as a group, to avoid excess network calls Returns: @@ -238,41 +243,41 @@ def mutations_batcher(self, **kwargs) -> MutationsBatcher: async def mutate_row( self, - row_key: str|bytes, - mutations: List[Mutation]|Mutation, + row_key: str | bytes, + mutations: List[Mutation] | Mutation, *, - operation_timeout:int|float|None=60, - per_request_timeout:int|float|None=None, - metadata: list[tuple[str, str]]|None = None, + operation_timeout: int | float | None = 60, + per_request_timeout: int | float | None = None, + metadata: list[tuple[str, str]] | None = None, ): """ - Mutates a row atomically. + Mutates a row atomically. - Cells already present in the row are left unchanged unless explicitly changed - by ``mutation``. + Cells already present in the row are left unchanged unless explicitly changed + by ``mutation``. - Idempotent operations (i.e, all mutations have an explicit timestamp) will be - retried on server failure. Non-idempotent operations will not. + Idempotent operations (i.e, all mutations have an explicit timestamp) will be + retried on server failure. Non-idempotent operations will not. - Args: - - row_key: the row to apply mutations to - - mutations: the set of mutations to apply to the row - - operation_timeout: the time budget for the entire operation, in seconds. - Failed requests will be retried within the budget. - time is only counted while actively waiting on the network. - DeadlineExceeded exception raised after timeout - - per_request_timeout: the time budget for an individual network request, - in seconds. If it takes longer than this time to complete, the request - will be cancelled with a DeadlineExceeded exception, and a retry will be - attempted if within operation_timeout budget - - metadata: Strings which should be sent along with the request as metadata headers. + Args: + - row_key: the row to apply mutations to + - mutations: the set of mutations to apply to the row + - operation_timeout: the time budget for the entire operation, in seconds. + Failed requests will be retried within the budget. + time is only counted while actively waiting on the network. + DeadlineExceeded exception raised after timeout + - per_request_timeout: the time budget for an individual network request, + in seconds. If it takes longer than this time to complete, the request + will be cancelled with a DeadlineExceeded exception, and a retry will be + attempted if within operation_timeout budget + - metadata: Strings which should be sent along with the request as metadata headers. - Raises: - - DeadlineExceeded: raised after operation timeout - will be chained with a RetryExceptionGroup containing all - GoogleAPIError exceptions from any retries that failed - - GoogleAPIError: raised on non-idempotent operations that cannot be - safely retried. + Raises: + - DeadlineExceeded: raised after operation timeout + will be chained with a RetryExceptionGroup containing all + GoogleAPIError exceptions from any retries that failed + - GoogleAPIError: raised on non-idempotent operations that cannot be + safely retried. """ pass @@ -280,9 +285,9 @@ async def bulk_mutate_rows( self, mutation_entries: list[BulkMutationsEntry], *, - operation_timeout:int|float|None=60, - per_request_timeout:int|float|None=None, - metadata: list[tuple[str, str]]|None = None, + operation_timeout: int | float | None = 60, + per_request_timeout: int | float | None = None, + metadata: list[tuple[str, str]] | None = None, ): """ Applies mutations for multiple rows in a single batched request. @@ -292,13 +297,13 @@ async def bulk_mutate_rows( In total, the row_mutations can contain at most 100000 individual mutations across all entries - Idempotent entries (i.e., entries with mutations with explicit timestamps) - will be retried on failure. Non-idempotent will not, and will reported in a + Idempotent entries (i.e., entries with mutations with explicit timestamps) + will be retried on failure. Non-idempotent will not, and will reported in a raised exception group Args: - mutation_entries: the batches of mutations to apply - Each entry will be applied atomically, but entries will be applied + Each entry will be applied atomically, but entries will be applied in arbitrary order - operation_timeout: the time budget for the entire operation, in seconds. Failed requests will be retried within the budget. @@ -318,12 +323,12 @@ async def bulk_mutate_rows( async def check_and_mutate_row( self, - row_key: str|bytes, - predicate: RowFilter|None, - true_case_mutations: Mutation | list[Mutation] | None = None, + row_key: str | bytes, + predicate: RowFilter | None, + true_case_mutations: Mutation | list[Mutation] | None = None, false_case_mutations: Mutation | list[Mutation] | None = None, - operation_timeout:int|float|None=60, - metadata: list[tuple[str, str]]|None = None, + operation_timeout: int | float | None = 60, + metadata: list[tuple[str, str]] | None = None, ) -> bool: """ Mutates a row atomically based on the output of a predicate filter @@ -332,9 +337,9 @@ async def check_and_mutate_row( Args: - row_key: the key of the row to mutate - - predicate: the filter to be applied to the contents of the specified row. - Depending on whether or not any results are yielded, - either true_case_mutations or false_case_mutations will be executed. + - predicate: the filter to be applied to the contents of the specified row. + Depending on whether or not any results are yielded, + either true_case_mutations or false_case_mutations will be executed. If None, checks that the row contains any values at all. - true_case_mutations: Changes to be atomically applied to the specified row if @@ -362,17 +367,17 @@ async def check_and_mutate_row( async def read_modify_write_row( self, - row_key: str|bytes, - rules: ReadModifyWriteRule|list[ReadModifyWriteRule]|dict|list[dict], + row_key: str | bytes, + rules: ReadModifyWriteRule | list[ReadModifyWriteRule] | dict | list[dict], *, - operation_timeout:int|float|None=60, - metadata: list[tuple[str, str]]|None = None, + operation_timeout: int | float | None = 60, + metadata: list[tuple[str, str]] | None = None, ) -> RowResponse: """ Reads and modifies a row atomically according to input ReadModifyWriteRules, and returns the contents of all modified cells - The new value for the timestamp is the greater of the existing timestamp or + The new value for the timestamp is the greater of the existing timestamp or the current server time. Non-idempotent operation: will not be retried @@ -393,6 +398,7 @@ async def read_modify_write_row( """ pass + if __name__ == "__main__": client = BigtableDataClient() client.get_table("instance_id", "table_id") diff --git a/tests/unit/v2_client/test_client.py b/tests/unit/v2_client/test_client.py index 82e70f8a6..fe42574a0 100644 --- a/tests/unit/v2_client/test_client.py +++ b/tests/unit/v2_client/test_client.py @@ -171,7 +171,9 @@ def test_client_constructor_w_both_admin_and_read_only(): def test_client_constructor_w_emulator_host(): from google.cloud.environment_vars import BIGTABLE_EMULATOR - from google.cloud.bigtable.deprecated.client import _DEFAULT_BIGTABLE_EMULATOR_CLIENT + from google.cloud.bigtable.deprecated.client import ( + _DEFAULT_BIGTABLE_EMULATOR_CLIENT, + ) from google.cloud.bigtable.deprecated.client import _GRPC_CHANNEL_OPTIONS emulator_host = "localhost:8081" @@ -216,7 +218,9 @@ def test_client_constructor_w_emulator_host_w_project(): def test_client_constructor_w_emulator_host_w_credentials(): from google.cloud.environment_vars import BIGTABLE_EMULATOR - from google.cloud.bigtable.deprecated.client import _DEFAULT_BIGTABLE_EMULATOR_CLIENT + from google.cloud.bigtable.deprecated.client import ( + _DEFAULT_BIGTABLE_EMULATOR_CLIENT, + ) from google.cloud.bigtable.deprecated.client import _GRPC_CHANNEL_OPTIONS emulator_host = "localhost:8081" diff --git a/tests/unit/v2_client/test_row_merger.py b/tests/unit/v2_client/test_row_merger.py index 8693634f8..26cedb34d 100644 --- a/tests/unit/v2_client/test_row_merger.py +++ b/tests/unit/v2_client/test_row_merger.py @@ -5,7 +5,11 @@ import proto import pytest -from google.cloud.bigtable.deprecated.row_data import PartialRowsData, PartialRowData, InvalidChunk +from google.cloud.bigtable.deprecated.row_data import ( + PartialRowsData, + PartialRowData, + InvalidChunk, +) from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable.deprecated.row_merger import _RowMerger diff --git a/tests/unit/v2_client/test_table.py b/tests/unit/v2_client/test_table.py index b885c2e5c..ad31e8bc9 100644 --- a/tests/unit/v2_client/test_table.py +++ b/tests/unit/v2_client/test_table.py @@ -1174,7 +1174,9 @@ def test_table_truncate(): table = _make_table(TABLE_ID, instance) table_api = client._table_admin_client = _make_table_api() - with mock.patch("google.cloud.bigtable.deprecated.table.Table.name", new=TABLE_NAME): + with mock.patch( + "google.cloud.bigtable.deprecated.table.Table.name", new=TABLE_NAME + ): result = table.truncate() assert result is None @@ -1797,7 +1799,9 @@ def test_rmrw_do_mutate_retryable_rows_w_retryable_error_internal_rst_stream_err # Raise internal server error with RST STREAM error messages # There should be no error raised and that the request is retried from google.api_core.exceptions import InternalServerError - from google.cloud.bigtable.deprecated.row_data import RETRYABLE_INTERNAL_ERROR_MESSAGES + from google.cloud.bigtable.deprecated.row_data import ( + RETRYABLE_INTERNAL_ERROR_MESSAGES, + ) row_cells = [ (b"row_key_1", ("cf", b"col", b"value1")), From 43b17dd0494e08b0ebabebc78e84c5ee190ca253 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 14:56:12 -0800 Subject: [PATCH 008/349] added additional unimplemented files --- google/cloud/bigtable/__init__.py | 8 +- google/cloud/bigtable/client.py | 39 +- google/cloud/bigtable/exceptions.py | 52 ++ google/cloud/bigtable/mutations.py | 54 ++ google/cloud/bigtable/mutations_batcher.py | 101 +++ .../cloud/bigtable/read_modify_write_rules.py | 35 + google/cloud/bigtable/read_rows_query.py | 52 ++ google/cloud/bigtable/row_filters.py | 838 ++++++++++++++++++ google/cloud/bigtable/row_response.py | 130 +++ 9 files changed, 1291 insertions(+), 18 deletions(-) create mode 100644 google/cloud/bigtable/exceptions.py create mode 100644 google/cloud/bigtable/mutations.py create mode 100644 google/cloud/bigtable/mutations_batcher.py create mode 100644 google/cloud/bigtable/read_modify_write_rules.py create mode 100644 google/cloud/bigtable/read_rows_query.py create mode 100644 google/cloud/bigtable/row_filters.py create mode 100644 google/cloud/bigtable/row_response.py diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index ff4b0cde2..ba5cc6898 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -18,6 +18,12 @@ from google.cloud.bigtable.client import BigtableDataClient from google.cloud.bigtable.client import Table +from typing_extensions import TypeAlias +from typing import List, Tuple + __version__: str = package_version.__version__ -__all__ = ("BigtableDataClient", "Table") +# Type alias for the output of sample_keys +RowKeySamples: TypeAlias = List[Tuple[bytes, int]] + +__all__ = ("BigtableDataClient", "Table", "RowKeySamples") diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 4cb5ccdc5..36ddeba6e 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -18,7 +18,16 @@ from typing import Any, AsyncIterable from google.cloud.client import ClientWithProject +from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry +from google.cloud.bigtable.mutations_batcher import MutationsBatcher +from google.cloud.bigtable.row_response import RowResponse +from google.cloud.bigtable.read_rows_query import ReadRowsQuery +from google.cloud.bigtable import RowKeySamples +from google.cloud.bigtable.row_filters import RowFilter +from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule +import google.auth.credentials.Credentials +import google.auth.credentials.ClientOptions class BigtableDataClient(ClientWithProject): def __init__( @@ -60,7 +69,7 @@ def __init__( table_id: str, app_profile_id: str | None = None, ): - pass + raise NotImplementedError async def read_rows_stream( self, @@ -119,7 +128,7 @@ async def read_rows_stream( from any retries that failed - IdleTimeout: if generator was abandoned """ - pass + raise NotImplementedError async def read_rows( self, @@ -131,7 +140,7 @@ async def read_rows( per_row_timeout: int | float | None = 10, per_request_timeout: int | float | None = None, metadata: list[tuple[str, str]] | None = None, - ) -> List[RowResponse]: + ) -> list[RowResponse]: """ Helper function that returns a full list instead of a generator @@ -140,7 +149,7 @@ async def read_rows( Returns: - a list of the rows returned by the query """ - pass + raise NotImplementedError async def read_row( self, @@ -158,7 +167,7 @@ async def read_row( Returns: - the individual row requested """ - pass + raise NotImplementedError async def read_rows_sharded( self, @@ -181,7 +190,7 @@ async def read_rows_sharded( Args: - query_list: a list of queries to run in parallel """ - pass + raise NotImplementedError async def row_exists( self, @@ -199,7 +208,7 @@ async def row_exists( Returns: - a bool indicating whether the row exists """ - pass + raise NotImplementedError async def sample_keys( self, @@ -227,7 +236,7 @@ async def sample_keys( will be chained with a RetryExceptionGroup containing all GoogleAPIError exceptions from any retries that failed """ - pass + raise NotImplementedError def mutations_batcher(self, **kwargs) -> MutationsBatcher: """ @@ -244,7 +253,7 @@ def mutations_batcher(self, **kwargs) -> MutationsBatcher: async def mutate_row( self, row_key: str | bytes, - mutations: List[Mutation] | Mutation, + mutations: list[Mutation] | Mutation, *, operation_timeout: int | float | None = 60, per_request_timeout: int | float | None = None, @@ -279,7 +288,7 @@ async def mutate_row( - GoogleAPIError: raised on non-idempotent operations that cannot be safely retried. """ - pass + raise NotImplementedError async def bulk_mutate_rows( self, @@ -319,7 +328,7 @@ async def bulk_mutate_rows( - MutationsExceptionGroup if one or more mutations fails Contains details about any failed entries in .exceptions """ - pass + raise NotImplementedError async def check_and_mutate_row( self, @@ -363,7 +372,7 @@ async def check_and_mutate_row( Raises: - GoogleAPIError exceptions from grpc call """ - pass + raise NotImplementedError async def read_modify_write_row( self, @@ -396,9 +405,5 @@ async def read_modify_write_row( Raises: - GoogleAPIError exceptions from grpc call """ - pass - + raise NotImplementedError -if __name__ == "__main__": - client = BigtableDataClient() - client.get_table("instance_id", "table_id") diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py new file mode 100644 index 000000000..d277178e8 --- /dev/null +++ b/google/cloud/bigtable/exceptions.py @@ -0,0 +1,52 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys + + +class MutationsExceptionGroup(BigtableExceptionGroup): + """ + Represents one or more exceptions that occur during a bulk mutation operation + """ + pass + + +class RetryExceptionGroup(BigtableExceptionGroup): + """Represents one or more exceptions that occur during a retryable operation""" + pass + + +is_311_plus = sys.version_info >= (3, 11) + +class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore + """ + Represents one or more exceptions that occur during a bulk Bigtable operation + + In Python 3.11+, this is an unmodified exception group. In < 3.10, it is a + custom exception with some exception group functionality backported, but does + Not implement the full API + """ + + def __init__(self, message, excs): + if is_311_plus: + super().__init__(message, excs) + else: + self.exceptions = excs + revised_message = f"{message} ({len(excs)} sub-exceptions)" + for i in range(len(excs)): + revised_message += f"\n+------------- {i} --------------\n" + revised_message += f"| {type(excs[i]).__name__}: {str(excs[i])}" + revised_message += f"\n+-------------------------------" + super().__init__(revised_message) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py new file mode 100644 index 000000000..8f63b776d --- /dev/null +++ b/google/cloud/bigtable/mutations.py @@ -0,0 +1,54 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from dataclasses import dataclass +from google.cloud.bigtable.row_response import family_id, qualifier, row_key + + +class Mutation: + pass + + +@dataclass +class SetCell(Mutation): + family: family_id + qualifier: qualifier + new_value: bytes | str | int + timestamp_ms: int | None = None + + +@dataclass +class DeleteRangeFromColumn(Mutation): + family: family_id + qualifier: qualifier + start_timestamp_ms: int + end_timestamp_ms: int + + +@dataclass +class DeleteAllFromFamily(Mutation): + family_to_delete: family_id + + +@dataclass +class DeleteAllFromRow(Mutation): + pass + + +@dataclass +class BulkMutationsEntry: + row_key: row_key + mutations: list[Mutation] | Mutation diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py new file mode 100644 index 000000000..48bbf6a2e --- /dev/null +++ b/google/cloud/bigtable/mutations_batcher.py @@ -0,0 +1,101 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import asyncio + +from google.cloud.bigtable.mutations import Mutation +from google.cloud.bigtable.row_response import row_key +from google.cloud.bigtable.row_filters import RowFilter +from google.cloud.bigtable import Table + + +class MutationsBatcher: + """ + Allows users to send batches using context manager API: + + Runs mutate_row, mutate_rows, and check_and_mutate_row internally, combining + to use as few network requests as required + + Flushes: + - manually + - every flush_interval seconds + - after queue reaches flush_count in quantity + - after queue reaches flush_size_bytes in storage size + - when batcher is closed or destroyed + + async with table.mutations_batcher() as batcher: + for i in range(10): + batcher.add(row, mut) + """ + + queue: asyncio.Queue[tuple[row_key, list[Mutation]]] + conditional_queues: dict[RowFilter, tuple[list[Mutation], list[Mutation]]] + + MB_SIZE = 1024 * 1024 + + def __init__( + self, + table: Table, + flush_count: int = 100, + flush_size_bytes: int = 100 * MB_SIZE, + max_mutation_bytes: int = 20 * MB_SIZE, + flush_interval: int = 5, + metadata: list[tuple[str, str]] | None = None, + ): + pass + + async def append(self, row_key: str | bytes, mutation: Mutation | list[Mutation]): + """ + Add a new mutation to the internal queue + """ + pass + + async def append_conditional( + self, + predicate_filter: RowFilter, + row_key: str | bytes, + if_true_mutations: Mutation | list[Mutation] | None = None, + if_false_mutations: Mutation | list[Mutation] | None = None, + ): + """ + Apply a different set of mutations based on the outcome of predicate_filter + + Calls check_and_mutate_row internally on flush + """ + pass + + async def flush(self): + """ + Send queue over network in as few calls as possible + + Raises: + - MutationsExceptionGroup if any mutation in the batch fails + """ + pass + + async def __aenter__(self): + """For context manager API""" + pass + + async def __aexit__(self, exc_type, exc, tb): + """For context manager API""" + pass + + async def close(self): + """ + Flush queue and clean up resources + """ + pass diff --git a/google/cloud/bigtable/read_modify_write_rules.py b/google/cloud/bigtable/read_modify_write_rules.py new file mode 100644 index 000000000..ae149f5bd --- /dev/null +++ b/google/cloud/bigtable/read_modify_write_rules.py @@ -0,0 +1,35 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from dataclasses import dataclass + +from google.cloud.bigtable.row_response import family_id, qualifier + +class ReadModifyWriteRule(): + pass + +@dataclass +class IncrementRule(ReadModifyWriteRule): + increment_amount: int + family: family_id + qualifier: qualifier + + +@dataclass +class AppendValueRule(ReadModifyWriteRule): + append_value: bytes | str + family: family_id + qualifier: qualifier diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py new file mode 100644 index 000000000..1f17a937e --- /dev/null +++ b/google/cloud/bigtable/read_rows_query.py @@ -0,0 +1,52 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations +from google.cloud.bigtable.row_filters import RowFilter +from google.cloud.bigtable import RowKeySamples + +class ReadRowsQuery: + """ + Class to encapsulate details of a read row request + """ + + def __init__( + self, row_keys: list[str | bytes] | str | bytes | None = None, limit=None + ): + pass + + def set_limit(self, limit: int) -> ReadRowsQuery: + raise NotImplementedError + + def set_filter(self, filter: RowFilter) -> ReadRowsQuery: + raise NotImplementedError + + def add_rows(self, row_id_list: list[str]) -> ReadRowsQuery: + raise NotImplementedError + + def add_range( + self, start_key: str | bytes | None = None, end_key: str | bytes | None = None + ) -> ReadRowsQuery: + raise NotImplementedError + + def shard(self, shard_keys: RowKeySamples | None = None) -> list[ReadRowsQuery]: + """ + Split this query into multiple queries that can be evenly distributed + across nodes and be run in parallel + + Returns: + - a list of queries that represent a sharded version of the original + query (if possible) + """ + raise NotImplementedError diff --git a/google/cloud/bigtable/row_filters.py b/google/cloud/bigtable/row_filters.py new file mode 100644 index 000000000..53192acc8 --- /dev/null +++ b/google/cloud/bigtable/row_filters.py @@ -0,0 +1,838 @@ +# Copyright 2016 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Filters for Google Cloud Bigtable Row classes.""" + +import struct + + +from google.cloud._helpers import _microseconds_from_datetime # type: ignore +from google.cloud._helpers import _to_bytes # type: ignore +from google.cloud.bigtable_v2.types import data as data_v2_pb2 + +_PACK_I64 = struct.Struct(">q").pack + + +class RowFilter(object): + """Basic filter to apply to cells in a row. + + These values can be combined via :class:`RowFilterChain`, + :class:`RowFilterUnion` and :class:`ConditionalRowFilter`. + + .. note:: + + This class is a do-nothing base class for all row filters. + """ + + +class _BoolFilter(RowFilter): + """Row filter that uses a boolean flag. + + :type flag: bool + :param flag: An indicator if a setting is turned on or off. + """ + + def __init__(self, flag): + self.flag = flag + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return other.flag == self.flag + + def __ne__(self, other): + return not self == other + + +class SinkFilter(_BoolFilter): + """Advanced row filter to skip parent filters. + + :type flag: bool + :param flag: ADVANCED USE ONLY. Hook for introspection into the row filter. + Outputs all cells directly to the output of the read rather + than to any parent filter. Cannot be used within the + ``predicate_filter``, ``true_filter``, or ``false_filter`` + of a :class:`ConditionalRowFilter`. + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(sink=self.flag) + + +class PassAllFilter(_BoolFilter): + """Row filter equivalent to not filtering at all. + + :type flag: bool + :param flag: Matches all cells, regardless of input. Functionally + equivalent to leaving ``filter`` unset, but included for + completeness. + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(pass_all_filter=self.flag) + + +class BlockAllFilter(_BoolFilter): + """Row filter that doesn't match any cells. + + :type flag: bool + :param flag: Does not match any cells, regardless of input. Useful for + temporarily disabling just part of a filter. + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(block_all_filter=self.flag) + + +class _RegexFilter(RowFilter): + """Row filter that uses a regular expression. + + The ``regex`` must be valid RE2 patterns. See Google's + `RE2 reference`_ for the accepted syntax. + + .. _RE2 reference: https://github.com/google/re2/wiki/Syntax + + :type regex: bytes or str + :param regex: + A regular expression (RE2) for some row filter. String values + will be encoded as ASCII. + """ + + def __init__(self, regex): + self.regex = _to_bytes(regex) + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return other.regex == self.regex + + def __ne__(self, other): + return not self == other + + +class RowKeyRegexFilter(_RegexFilter): + """Row filter for a row key regular expression. + + The ``regex`` must be valid RE2 patterns. See Google's + `RE2 reference`_ for the accepted syntax. + + .. _RE2 reference: https://github.com/google/re2/wiki/Syntax + + .. note:: + + Special care need be used with the expression used. Since + each of these properties can contain arbitrary bytes, the ``\\C`` + escape sequence must be used if a true wildcard is desired. The ``.`` + character will not match the new line character ``\\n``, which may be + present in a binary value. + + :type regex: bytes + :param regex: A regular expression (RE2) to match cells from rows with row + keys that satisfy this regex. For a + ``CheckAndMutateRowRequest``, this filter is unnecessary + since the row key is already specified. + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(row_key_regex_filter=self.regex) + + +class RowSampleFilter(RowFilter): + """Matches all cells from a row with probability p. + + :type sample: float + :param sample: The probability of matching a cell (must be in the + interval ``(0, 1)`` The end points are excluded). + """ + + def __init__(self, sample): + self.sample = sample + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return other.sample == self.sample + + def __ne__(self, other): + return not self == other + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(row_sample_filter=self.sample) + + +class FamilyNameRegexFilter(_RegexFilter): + """Row filter for a family name regular expression. + + The ``regex`` must be valid RE2 patterns. See Google's + `RE2 reference`_ for the accepted syntax. + + .. _RE2 reference: https://github.com/google/re2/wiki/Syntax + + :type regex: str + :param regex: A regular expression (RE2) to match cells from columns in a + given column family. For technical reasons, the regex must + not contain the ``':'`` character, even if it is not being + used as a literal. + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(family_name_regex_filter=self.regex) + + +class ColumnQualifierRegexFilter(_RegexFilter): + """Row filter for a column qualifier regular expression. + + The ``regex`` must be valid RE2 patterns. See Google's + `RE2 reference`_ for the accepted syntax. + + .. _RE2 reference: https://github.com/google/re2/wiki/Syntax + + .. note:: + + Special care need be used with the expression used. Since + each of these properties can contain arbitrary bytes, the ``\\C`` + escape sequence must be used if a true wildcard is desired. The ``.`` + character will not match the new line character ``\\n``, which may be + present in a binary value. + + :type regex: bytes + :param regex: A regular expression (RE2) to match cells from column that + match this regex (irrespective of column family). + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(column_qualifier_regex_filter=self.regex) + + +class TimestampRange(object): + """Range of time with inclusive lower and exclusive upper bounds. + + :type start: :class:`datetime.datetime` + :param start: (Optional) The (inclusive) lower bound of the timestamp + range. If omitted, defaults to Unix epoch. + + :type end: :class:`datetime.datetime` + :param end: (Optional) The (exclusive) upper bound of the timestamp + range. If omitted, no upper bound is used. + """ + + def __init__(self, start=None, end=None): + self.start = start + self.end = end + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return other.start == self.start and other.end == self.end + + def __ne__(self, other): + return not self == other + + def to_pb(self): + """Converts the :class:`TimestampRange` to a protobuf. + + :rtype: :class:`.data_v2_pb2.TimestampRange` + :returns: The converted current object. + """ + timestamp_range_kwargs = {} + if self.start is not None: + timestamp_range_kwargs["start_timestamp_micros"] = ( + _microseconds_from_datetime(self.start) // 1000 * 1000 + ) + if self.end is not None: + end_time = _microseconds_from_datetime(self.end) + if end_time % 1000 != 0: + end_time = end_time // 1000 * 1000 + 1000 + timestamp_range_kwargs["end_timestamp_micros"] = end_time + return data_v2_pb2.TimestampRange(**timestamp_range_kwargs) + + +class TimestampRangeFilter(RowFilter): + """Row filter that limits cells to a range of time. + + :type range_: :class:`TimestampRange` + :param range_: Range of time that cells should match against. + """ + + def __init__(self, range_): + self.range_ = range_ + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return other.range_ == self.range_ + + def __ne__(self, other): + return not self == other + + def to_pb(self): + """Converts the row filter to a protobuf. + + First converts the ``range_`` on the current object to a protobuf and + then uses it in the ``timestamp_range_filter`` field. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(timestamp_range_filter=self.range_.to_pb()) + + +class ColumnRangeFilter(RowFilter): + """A row filter to restrict to a range of columns. + + Both the start and end column can be included or excluded in the range. + By default, we include them both, but this can be changed with optional + flags. + + :type column_family_id: str + :param column_family_id: The column family that contains the columns. Must + be of the form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``. + + :type start_column: bytes + :param start_column: The start of the range of columns. If no value is + used, the backend applies no upper bound to the + values. + + :type end_column: bytes + :param end_column: The end of the range of columns. If no value is used, + the backend applies no upper bound to the values. + + :type inclusive_start: bool + :param inclusive_start: Boolean indicating if the start column should be + included in the range (or excluded). Defaults + to :data:`True` if ``start_column`` is passed and + no ``inclusive_start`` was given. + + :type inclusive_end: bool + :param inclusive_end: Boolean indicating if the end column should be + included in the range (or excluded). Defaults + to :data:`True` if ``end_column`` is passed and + no ``inclusive_end`` was given. + + :raises: :class:`ValueError ` if ``inclusive_start`` + is set but no ``start_column`` is given or if ``inclusive_end`` + is set but no ``end_column`` is given + """ + + def __init__( + self, + column_family_id, + start_column=None, + end_column=None, + inclusive_start=None, + inclusive_end=None, + ): + self.column_family_id = column_family_id + + if inclusive_start is None: + inclusive_start = True + elif start_column is None: + raise ValueError( + "Inclusive start was specified but no " "start column was given." + ) + self.start_column = start_column + self.inclusive_start = inclusive_start + + if inclusive_end is None: + inclusive_end = True + elif end_column is None: + raise ValueError( + "Inclusive end was specified but no " "end column was given." + ) + self.end_column = end_column + self.inclusive_end = inclusive_end + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return ( + other.column_family_id == self.column_family_id + and other.start_column == self.start_column + and other.end_column == self.end_column + and other.inclusive_start == self.inclusive_start + and other.inclusive_end == self.inclusive_end + ) + + def __ne__(self, other): + return not self == other + + def to_pb(self): + """Converts the row filter to a protobuf. + + First converts to a :class:`.data_v2_pb2.ColumnRange` and then uses it + in the ``column_range_filter`` field. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + column_range_kwargs = {"family_name": self.column_family_id} + if self.start_column is not None: + if self.inclusive_start: + key = "start_qualifier_closed" + else: + key = "start_qualifier_open" + column_range_kwargs[key] = _to_bytes(self.start_column) + if self.end_column is not None: + if self.inclusive_end: + key = "end_qualifier_closed" + else: + key = "end_qualifier_open" + column_range_kwargs[key] = _to_bytes(self.end_column) + + column_range = data_v2_pb2.ColumnRange(**column_range_kwargs) + return data_v2_pb2.RowFilter(column_range_filter=column_range) + + +class ValueRegexFilter(_RegexFilter): + """Row filter for a value regular expression. + + The ``regex`` must be valid RE2 patterns. See Google's + `RE2 reference`_ for the accepted syntax. + + .. _RE2 reference: https://github.com/google/re2/wiki/Syntax + + .. note:: + + Special care need be used with the expression used. Since + each of these properties can contain arbitrary bytes, the ``\\C`` + escape sequence must be used if a true wildcard is desired. The ``.`` + character will not match the new line character ``\\n``, which may be + present in a binary value. + + :type regex: bytes or str + :param regex: A regular expression (RE2) to match cells with values that + match this regex. String values will be encoded as ASCII. + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(value_regex_filter=self.regex) + + +class ExactValueFilter(ValueRegexFilter): + """Row filter for an exact value. + + + :type value: bytes or str or int + :param value: + a literal string encodable as ASCII, or the + equivalent bytes, or an integer (which will be packed into 8-bytes). + """ + + def __init__(self, value): + if isinstance(value, int): + value = _PACK_I64(value) + super(ExactValueFilter, self).__init__(value) + + +class ValueRangeFilter(RowFilter): + """A range of values to restrict to in a row filter. + + Will only match cells that have values in this range. + + Both the start and end value can be included or excluded in the range. + By default, we include them both, but this can be changed with optional + flags. + + :type start_value: bytes + :param start_value: The start of the range of values. If no value is used, + the backend applies no lower bound to the values. + + :type end_value: bytes + :param end_value: The end of the range of values. If no value is used, + the backend applies no upper bound to the values. + + :type inclusive_start: bool + :param inclusive_start: Boolean indicating if the start value should be + included in the range (or excluded). Defaults + to :data:`True` if ``start_value`` is passed and + no ``inclusive_start`` was given. + + :type inclusive_end: bool + :param inclusive_end: Boolean indicating if the end value should be + included in the range (or excluded). Defaults + to :data:`True` if ``end_value`` is passed and + no ``inclusive_end`` was given. + + :raises: :class:`ValueError ` if ``inclusive_start`` + is set but no ``start_value`` is given or if ``inclusive_end`` + is set but no ``end_value`` is given + """ + + def __init__( + self, start_value=None, end_value=None, inclusive_start=None, inclusive_end=None + ): + if inclusive_start is None: + inclusive_start = True + elif start_value is None: + raise ValueError( + "Inclusive start was specified but no " "start value was given." + ) + if isinstance(start_value, int): + start_value = _PACK_I64(start_value) + self.start_value = start_value + self.inclusive_start = inclusive_start + + if inclusive_end is None: + inclusive_end = True + elif end_value is None: + raise ValueError( + "Inclusive end was specified but no " "end value was given." + ) + if isinstance(end_value, int): + end_value = _PACK_I64(end_value) + self.end_value = end_value + self.inclusive_end = inclusive_end + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return ( + other.start_value == self.start_value + and other.end_value == self.end_value + and other.inclusive_start == self.inclusive_start + and other.inclusive_end == self.inclusive_end + ) + + def __ne__(self, other): + return not self == other + + def to_pb(self): + """Converts the row filter to a protobuf. + + First converts to a :class:`.data_v2_pb2.ValueRange` and then uses + it to create a row filter protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + value_range_kwargs = {} + if self.start_value is not None: + if self.inclusive_start: + key = "start_value_closed" + else: + key = "start_value_open" + value_range_kwargs[key] = _to_bytes(self.start_value) + if self.end_value is not None: + if self.inclusive_end: + key = "end_value_closed" + else: + key = "end_value_open" + value_range_kwargs[key] = _to_bytes(self.end_value) + + value_range = data_v2_pb2.ValueRange(**value_range_kwargs) + return data_v2_pb2.RowFilter(value_range_filter=value_range) + + +class _CellCountFilter(RowFilter): + """Row filter that uses an integer count of cells. + + The cell count is used as an offset or a limit for the number + of results returned. + + :type num_cells: int + :param num_cells: An integer count / offset / limit. + """ + + def __init__(self, num_cells): + self.num_cells = num_cells + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return other.num_cells == self.num_cells + + def __ne__(self, other): + return not self == other + + +class CellsRowOffsetFilter(_CellCountFilter): + """Row filter to skip cells in a row. + + :type num_cells: int + :param num_cells: Skips the first N cells of the row. + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(cells_per_row_offset_filter=self.num_cells) + + +class CellsRowLimitFilter(_CellCountFilter): + """Row filter to limit cells in a row. + + :type num_cells: int + :param num_cells: Matches only the first N cells of the row. + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(cells_per_row_limit_filter=self.num_cells) + + +class CellsColumnLimitFilter(_CellCountFilter): + """Row filter to limit cells in a column. + + :type num_cells: int + :param num_cells: Matches only the most recent N cells within each column. + This filters a (family name, column) pair, based on + timestamps of each cell. + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(cells_per_column_limit_filter=self.num_cells) + + +class StripValueTransformerFilter(_BoolFilter): + """Row filter that transforms cells into empty string (0 bytes). + + :type flag: bool + :param flag: If :data:`True`, replaces each cell's value with the empty + string. As the name indicates, this is more useful as a + transformer than a generic query / filter. + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(strip_value_transformer=self.flag) + + +class ApplyLabelFilter(RowFilter): + """Filter to apply labels to cells. + + Intended to be used as an intermediate filter on a pre-existing filtered + result set. This way if two sets are combined, the label can tell where + the cell(s) originated.This allows the client to determine which results + were produced from which part of the filter. + + .. note:: + + Due to a technical limitation of the backend, it is not currently + possible to apply multiple labels to a cell. + + :type label: str + :param label: Label to apply to cells in the output row. Values must be + at most 15 characters long, and match the pattern + ``[a-z0-9\\-]+``. + """ + + def __init__(self, label): + self.label = label + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return other.label == self.label + + def __ne__(self, other): + return not self == other + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + return data_v2_pb2.RowFilter(apply_label_transformer=self.label) + + +class _FilterCombination(RowFilter): + """Chain of row filters. + + Sends rows through several filters in sequence. The filters are "chained" + together to process a row. After the first filter is applied, the second + is applied to the filtered output and so on for subsequent filters. + + :type filters: list + :param filters: List of :class:`RowFilter` + """ + + def __init__(self, filters=None): + if filters is None: + filters = [] + self.filters = filters + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return other.filters == self.filters + + def __ne__(self, other): + return not self == other + + +class RowFilterChain(_FilterCombination): + """Chain of row filters. + + Sends rows through several filters in sequence. The filters are "chained" + together to process a row. After the first filter is applied, the second + is applied to the filtered output and so on for subsequent filters. + + :type filters: list + :param filters: List of :class:`RowFilter` + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + chain = data_v2_pb2.RowFilter.Chain( + filters=[row_filter.to_pb() for row_filter in self.filters] + ) + return data_v2_pb2.RowFilter(chain=chain) + + +class RowFilterUnion(_FilterCombination): + """Union of row filters. + + Sends rows through several filters simultaneously, then + merges / interleaves all the filtered results together. + + If multiple cells are produced with the same column and timestamp, + they will all appear in the output row in an unspecified mutual order. + + :type filters: list + :param filters: List of :class:`RowFilter` + """ + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + interleave = data_v2_pb2.RowFilter.Interleave( + filters=[row_filter.to_pb() for row_filter in self.filters] + ) + return data_v2_pb2.RowFilter(interleave=interleave) + + +class ConditionalRowFilter(RowFilter): + """Conditional row filter which exhibits ternary behavior. + + Executes one of two filters based on another filter. If the ``base_filter`` + returns any cells in the row, then ``true_filter`` is executed. If not, + then ``false_filter`` is executed. + + .. note:: + + The ``base_filter`` does not execute atomically with the true and false + filters, which may lead to inconsistent or unexpected results. + + Additionally, executing a :class:`ConditionalRowFilter` has poor + performance on the server, especially when ``false_filter`` is set. + + :type base_filter: :class:`RowFilter` + :param base_filter: The filter to condition on before executing the + true/false filters. + + :type true_filter: :class:`RowFilter` + :param true_filter: (Optional) The filter to execute if there are any cells + matching ``base_filter``. If not provided, no results + will be returned in the true case. + + :type false_filter: :class:`RowFilter` + :param false_filter: (Optional) The filter to execute if there are no cells + matching ``base_filter``. If not provided, no results + will be returned in the false case. + """ + + def __init__(self, base_filter, true_filter=None, false_filter=None): + self.base_filter = base_filter + self.true_filter = true_filter + self.false_filter = false_filter + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return ( + other.base_filter == self.base_filter + and other.true_filter == self.true_filter + and other.false_filter == self.false_filter + ) + + def __ne__(self, other): + return not self == other + + def to_pb(self): + """Converts the row filter to a protobuf. + + :rtype: :class:`.data_v2_pb2.RowFilter` + :returns: The converted current object. + """ + condition_kwargs = {"predicate_filter": self.base_filter.to_pb()} + if self.true_filter is not None: + condition_kwargs["true_filter"] = self.true_filter.to_pb() + if self.false_filter is not None: + condition_kwargs["false_filter"] = self.false_filter.to_pb() + condition = data_v2_pb2.RowFilter.Condition(**condition_kwargs) + return data_v2_pb2.RowFilter(condition=condition) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py new file mode 100644 index 000000000..be57f972a --- /dev/null +++ b/google/cloud/bigtable/row_response.py @@ -0,0 +1,130 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from collections import OrderedDict +from collections import Sequence +from typing_extensions import TypeAlias + +# Type aliases used internally for readability. +row_key: TypeAlias = bytes +family_id: TypeAlias = str +qualifier: TypeAlias = bytes +row_value: TypeAlias = bytes + + +class RowResponse(Sequence): + """ + Model class for row data returned from server + + Does not represent all data contained in the row, only data returned by a + query. + Expected to be read-only to users, and written by backend + + Can be indexed: + cells = row["family", "qualifier"] + """ + + + def __init__(self, key:row_key, cells: list[CellResponse]): + self.row_key = key + self.cells: OrderedDict[family_id, OrderedDict[qualifier, list[CellResponse]]] = OrderedDict() + """Expected to be used internally only""" + pass + + def get_cells( + self, family: str | None, qualifer: str | bytes | None + ) -> list[CellResponse]: + """ + Returns cells sorted in Bigtable native order: + - Family lexicographically ascending + - Qualifier lexicographically ascending + - Timestamp in reverse chronological order + + If family or qualifier not passed, will include all + + Syntactic sugar: cells = row["family", "qualifier"] + """ + raise NotImplementedError + + def get_index(self) -> dict[family_id, list[qualifier]]: + """ + Returns a list of family and qualifiers for the object + """ + raise NotImplementedError + + def __str__(self): + """ + Human-readable string representation + + (family, qualifier) cells + (ABC, XYZ) [b"123", b"456" ...(+5)] + (DEF, XYZ) [b"123"] + (GHI, XYZ) [b"123", b"456" ...(+2)] + """ + raise NotImplementedError + + +class CellResponse: + """ + Model class for cell data + + Does not represent all data contained in the cell, only data returned by a + query. + Expected to be read-only to users, and written by backend + """ + + def __init__( + self, + value: row_value, + row: row_key, + family: family_id, + column_qualifier: qualifier, + labels: list[str] | None = None, + timestamp: int | None = None, + ): + self.value = value + self.row_key = row + self.family = family + self.column_qualifier = column_qualifier + self.labels = labels + self.timestamp = timestamp + + def decode_value(self, encoding="UTF-8", errors=None) -> str: + """decode bytes to string""" + return self.value.decode(encoding, errors) + + def __int__(self) -> int: + """ + Allows casting cell to int + Interprets value as a 64-bit big-endian signed integer, as expected by + ReadModifyWrite increment rule + """ + return int.from_bytes(self.value, byteorder="big", signed=True) + + def __str__(self) -> str: + """ + Allows casting cell to str + Prints encoded byte string, same as printing value directly. + """ + return str(self.value) + + """For Bigtable native ordering""" + + def __lt__(self, other) -> bool: + raise NotImplementedError + + def __eq__(self, other) -> bool: + raise NotImplementedError From 32e8e4533d6ae43ee371cbd3817755cf4dc6cbf7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 15:03:54 -0800 Subject: [PATCH 009/349] improved __init__ --- google/cloud/bigtable/__init__.py | 28 ++++++++++++++++++- google/cloud/bigtable/client.py | 2 +- google/cloud/bigtable/exceptions.py | 5 +++- .../cloud/bigtable/read_modify_write_rules.py | 4 ++- google/cloud/bigtable/read_rows_query.py | 1 + google/cloud/bigtable/row_response.py | 7 +++-- 6 files changed, 40 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index ba5cc6898..05c20d098 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -18,6 +18,18 @@ from google.cloud.bigtable.client import BigtableDataClient from google.cloud.bigtable.client import Table +from google.cloud.bigtable.read_rows_query import ReadRowsQuery +from google.cloud.bigtable.row_response import RowResponse +from google.cloud.bigtable.row_response import CellResponse + +from google.cloud.bigtable.mutations_batcher import MutationsBatcher +from google.cloud.bigtable.mutations import Mutation +from google.cloud.bigtable.mutations import BulkMutationsEntry +from google.cloud.bigtable.mutations import SetCell +from google.cloud.bigtable.mutations import DeleteRangeFromColumn +from google.cloud.bigtable.mutations import DeleteAllFromFamily +from google.cloud.bigtable.mutations import DeleteAllFromRow + from typing_extensions import TypeAlias from typing import List, Tuple @@ -26,4 +38,18 @@ # Type alias for the output of sample_keys RowKeySamples: TypeAlias = List[Tuple[bytes, int]] -__all__ = ("BigtableDataClient", "Table", "RowKeySamples") +__all__ = ( + "BigtableDataClient", + "Table", + "RowKeySamples", + "ReadRowsQuery", + "MutationsBatcher", + "Mutation", + "BulkMutationsEntry", + "SetCell", + "DeleteRangeFromColumn", + "DeleteAllFromFamily", + "DeleteAllFromRow", + "RowResponse", + "CellResponse", +) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 36ddeba6e..d6e69e61d 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -29,6 +29,7 @@ import google.auth.credentials.Credentials import google.auth.credentials.ClientOptions + class BigtableDataClient(ClientWithProject): def __init__( self, @@ -406,4 +407,3 @@ async def read_modify_write_row( - GoogleAPIError exceptions from grpc call """ raise NotImplementedError - diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index d277178e8..bea1b1ee8 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -20,17 +20,20 @@ class MutationsExceptionGroup(BigtableExceptionGroup): """ Represents one or more exceptions that occur during a bulk mutation operation """ + pass class RetryExceptionGroup(BigtableExceptionGroup): """Represents one or more exceptions that occur during a retryable operation""" + pass is_311_plus = sys.version_info >= (3, 11) -class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore + +class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore """ Represents one or more exceptions that occur during a bulk Bigtable operation diff --git a/google/cloud/bigtable/read_modify_write_rules.py b/google/cloud/bigtable/read_modify_write_rules.py index ae149f5bd..910243fef 100644 --- a/google/cloud/bigtable/read_modify_write_rules.py +++ b/google/cloud/bigtable/read_modify_write_rules.py @@ -18,9 +18,11 @@ from google.cloud.bigtable.row_response import family_id, qualifier -class ReadModifyWriteRule(): + +class ReadModifyWriteRule: pass + @dataclass class IncrementRule(ReadModifyWriteRule): increment_amount: int diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 1f17a937e..1411b6f37 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -16,6 +16,7 @@ from google.cloud.bigtable.row_filters import RowFilter from google.cloud.bigtable import RowKeySamples + class ReadRowsQuery: """ Class to encapsulate details of a read row request diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index be57f972a..d98726e10 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -37,10 +37,11 @@ class RowResponse(Sequence): cells = row["family", "qualifier"] """ - - def __init__(self, key:row_key, cells: list[CellResponse]): + def __init__(self, key: row_key, cells: list[CellResponse]): self.row_key = key - self.cells: OrderedDict[family_id, OrderedDict[qualifier, list[CellResponse]]] = OrderedDict() + self.cells: OrderedDict[ + family_id, OrderedDict[qualifier, list[CellResponse]] + ] = OrderedDict() """Expected to be used internally only""" pass From 65cb219ee2e65171788bb81433fdac8b7be1b059 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 15:20:25 -0800 Subject: [PATCH 010/349] stricter type checks --- google/cloud/bigtable/client.py | 8 ++++---- google/cloud/bigtable/row_response.py | 2 +- noxfile.py | 10 ++++++++-- owlbot.py | 10 ++++++++-- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index d6e69e61d..5b3f5c25a 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -74,7 +74,7 @@ def __init__( async def read_rows_stream( self, - query: ReadRowsQuery | dict, + query: ReadRowsQuery | dict[str,Any], *, shard: bool = False, limit: int | None, @@ -133,7 +133,7 @@ async def read_rows_stream( async def read_rows( self, - query: ReadRowsQuery | dict, + query: ReadRowsQuery | dict[str, Any], *, shard: bool = False, limit: int | None, @@ -172,7 +172,7 @@ async def read_row( async def read_rows_sharded( self, - query_list: list[ReadRowsQuery] | list[dict], + query_list: list[ReadRowsQuery] | list[dict[str, Any]], *, limit: int | None, cache_size_limit: int | None = None, @@ -378,7 +378,7 @@ async def check_and_mutate_row( async def read_modify_write_row( self, row_key: str | bytes, - rules: ReadModifyWriteRule | list[ReadModifyWriteRule] | dict | list[dict], + rules: ReadModifyWriteRule | list[ReadModifyWriteRule] | dict[str,Any] | list[dict[str,Any]], *, operation_timeout: int | float | None = 60, metadata: list[tuple[str, str]] | None = None, diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index d98726e10..b20298c19 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -25,7 +25,7 @@ row_value: TypeAlias = bytes -class RowResponse(Sequence): +class RowResponse(Sequence[CellResponse]): """ Model class for row data returned from server diff --git a/noxfile.py b/noxfile.py index 47415385a..2bb1638b0 100644 --- a/noxfile.py +++ b/noxfile.py @@ -128,8 +128,14 @@ def mypy(session): session.install("-e", ".") session.install("mypy", "types-setuptools", "types-protobuf", "types-mock") session.install("google-cloud-testutils") - # TODO: also verify types on tests, all of google package - session.run("mypy", "google/", "tests/") + session.run("mypy", "google/cloud/bigtable", "tests/", + "--check-untyped-defs", + "--warn-unreachable", + "--disallow-any-generics", + "--exclude", "google/cloud/bigtable/deprecated", + "--exclude", "tests/system/v2_client", + "--exclude", "tests/unit/v2_client", + ) @nox.session(python=DEFAULT_PYTHON_VERSION) diff --git a/owlbot.py b/owlbot.py index b6aa2f8a2..41d7da199 100644 --- a/owlbot.py +++ b/owlbot.py @@ -168,8 +168,14 @@ def mypy(session): session.install("-e", ".") session.install("mypy", "types-setuptools", "types-protobuf", "types-mock") session.install("google-cloud-testutils") - # TODO: also verify types on tests, all of google package - session.run("mypy", "google/", "tests/") + session.run("mypy", "google/cloud/bigtable", "tests/", + "--check-untyped-defs", + "--warn-unreachable", + "--disallow-any-generics", + "--exclude", "google/cloud/bigtable/deprecated", + "--exclude", "tests/system/v2_client", + "--exclude", "tests/unit/v2_client", + ) @nox.session(python=DEFAULT_PYTHON_VERSION) From c8b8a5aea518cb86f39ff9ec179ca001c06b34f3 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 15:21:34 -0800 Subject: [PATCH 011/349] ran blacken --- google/cloud/bigtable/client.py | 7 +++++-- noxfile.py | 14 ++++++++++---- owlbot.py | 14 ++++++++++---- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 5b3f5c25a..5b329b41c 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -74,7 +74,7 @@ def __init__( async def read_rows_stream( self, - query: ReadRowsQuery | dict[str,Any], + query: ReadRowsQuery | dict[str, Any], *, shard: bool = False, limit: int | None, @@ -378,7 +378,10 @@ async def check_and_mutate_row( async def read_modify_write_row( self, row_key: str | bytes, - rules: ReadModifyWriteRule | list[ReadModifyWriteRule] | dict[str,Any] | list[dict[str,Any]], + rules: ReadModifyWriteRule + | list[ReadModifyWriteRule] + | dict[str, Any] + | list[dict[str, Any]], *, operation_timeout: int | float | None = 60, metadata: list[tuple[str, str]] | None = None, diff --git a/noxfile.py b/noxfile.py index 2bb1638b0..688262f6f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -128,13 +128,19 @@ def mypy(session): session.install("-e", ".") session.install("mypy", "types-setuptools", "types-protobuf", "types-mock") session.install("google-cloud-testutils") - session.run("mypy", "google/cloud/bigtable", "tests/", + session.run( + "mypy", + "google/cloud/bigtable", + "tests/", "--check-untyped-defs", "--warn-unreachable", "--disallow-any-generics", - "--exclude", "google/cloud/bigtable/deprecated", - "--exclude", "tests/system/v2_client", - "--exclude", "tests/unit/v2_client", + "--exclude", + "google/cloud/bigtable/deprecated", + "--exclude", + "tests/system/v2_client", + "--exclude", + "tests/unit/v2_client", ) diff --git a/owlbot.py b/owlbot.py index 41d7da199..92ceb17a5 100644 --- a/owlbot.py +++ b/owlbot.py @@ -168,13 +168,19 @@ def mypy(session): session.install("-e", ".") session.install("mypy", "types-setuptools", "types-protobuf", "types-mock") session.install("google-cloud-testutils") - session.run("mypy", "google/cloud/bigtable", "tests/", + session.run( + "mypy", + "google/cloud/bigtable", + "tests/", "--check-untyped-defs", "--warn-unreachable", "--disallow-any-generics", - "--exclude", "google/cloud/bigtable/deprecated", - "--exclude", "tests/system/v2_client", - "--exclude", "tests/unit/v2_client", + "--exclude", + "google/cloud/bigtable/deprecated", + "--exclude", + "tests/system/v2_client", + "--exclude", + "tests/unit/v2_client", ) From ff835ecf55f3eee8f4e0699d53fb7e01569f46a2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 15:39:19 -0800 Subject: [PATCH 012/349] removed sample implementation from BigtableExceptionGroup --- google/cloud/bigtable/exceptions.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index bea1b1ee8..7df5ad4cf 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -46,10 +46,5 @@ def __init__(self, message, excs): if is_311_plus: super().__init__(message, excs) else: + super().__init__(message) self.exceptions = excs - revised_message = f"{message} ({len(excs)} sub-exceptions)" - for i in range(len(excs)): - revised_message += f"\n+------------- {i} --------------\n" - revised_message += f"| {type(excs[i]).__name__}: {str(excs[i])}" - revised_message += f"\n+-------------------------------" - super().__init__(revised_message) From 3207ef50bf039f4743884d73353999241febb207 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 16:12:21 -0800 Subject: [PATCH 013/349] fixed circular import issues --- google/cloud/bigtable/__init__.py | 13 +++++++------ google/cloud/bigtable/client.py | 3 +-- google/cloud/bigtable/mutations_batcher.py | 6 ++++-- google/cloud/bigtable/row_response.py | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index 05c20d098..370857dd0 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -13,6 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +from typing_extensions import TypeAlias +from typing import List, Tuple + +# Type alias for the output of sample_keys +RowKeySamples: TypeAlias = List[Tuple[bytes, int]] + from google.cloud.bigtable import gapic_version as package_version from google.cloud.bigtable.client import BigtableDataClient @@ -30,14 +37,8 @@ from google.cloud.bigtable.mutations import DeleteAllFromFamily from google.cloud.bigtable.mutations import DeleteAllFromRow -from typing_extensions import TypeAlias -from typing import List, Tuple - __version__: str = package_version.__version__ -# Type alias for the output of sample_keys -RowKeySamples: TypeAlias = List[Tuple[bytes, int]] - __all__ = ( "BigtableDataClient", "Table", diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 5b329b41c..a7767cba7 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -26,8 +26,7 @@ from google.cloud.bigtable.row_filters import RowFilter from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule -import google.auth.credentials.Credentials -import google.auth.credentials.ClientOptions +import google.auth.credentials class BigtableDataClient(ClientWithProject): diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 48bbf6a2e..315a08cb4 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -15,12 +15,14 @@ from __future__ import annotations import asyncio +from typing import TYPE_CHECKING from google.cloud.bigtable.mutations import Mutation from google.cloud.bigtable.row_response import row_key from google.cloud.bigtable.row_filters import RowFilter -from google.cloud.bigtable import Table +if TYPE_CHECKING: + from google.cloud.bigtable.client import Table class MutationsBatcher: """ @@ -48,7 +50,7 @@ class MutationsBatcher: def __init__( self, - table: Table, + table: "Table", flush_count: int = 100, flush_size_bytes: int = 100 * MB_SIZE, max_mutation_bytes: int = 20 * MB_SIZE, diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index b20298c19..6d4248925 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -25,7 +25,7 @@ row_value: TypeAlias = bytes -class RowResponse(Sequence[CellResponse]): +class RowResponse(Sequence["CellResponse"]): """ Model class for row data returned from server From 3b3d720a190422761a5169a7043c4202f1f739e2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 16:12:38 -0800 Subject: [PATCH 014/349] added deprecation warning --- google/cloud/bigtable/deprecated/client.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/google/cloud/bigtable/deprecated/client.py b/google/cloud/bigtable/deprecated/client.py index 058055b60..6c46c9f19 100644 --- a/google/cloud/bigtable/deprecated/client.py +++ b/google/cloud/bigtable/deprecated/client.py @@ -91,6 +91,8 @@ def inner(self): class Client(ClientWithProject): """Client for interacting with Google Cloud Bigtable API. + DEPRECATED: This class is deprecated. Please use `google.cloud.bigtable.BigtableDataClient` instead. + .. note:: Since the Cloud Bigtable API requires the gRPC transport, no @@ -159,6 +161,11 @@ def __init__( admin_client_options=None, channel=None, ): + warnings.warn( + "'Client'. Please use 'google.cloud.bigtable.BigtableDataClient' instead.", + DeprecationWarning, + stacklevel=2, + ) if client_info is None: client_info = client_info_lib.ClientInfo( client_library_version=bigtable.__version__, From fa29ba192485312fd5d27d233a2d0609e731ebdf Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 16:14:37 -0800 Subject: [PATCH 015/349] updated warning messages --- google/cloud/bigtable/deprecated/client.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/deprecated/client.py b/google/cloud/bigtable/deprecated/client.py index 6c46c9f19..c13e5f0da 100644 --- a/google/cloud/bigtable/deprecated/client.py +++ b/google/cloud/bigtable/deprecated/client.py @@ -91,7 +91,8 @@ def inner(self): class Client(ClientWithProject): """Client for interacting with Google Cloud Bigtable API. - DEPRECATED: This class is deprecated. Please use `google.cloud.bigtable.BigtableDataClient` instead. + DEPRECATED: This class is deprecated and may be removed in a future version + Please use `google.cloud.bigtable.BigtableDataClient` instead. .. note:: @@ -162,7 +163,7 @@ def __init__( channel=None, ): warnings.warn( - "'Client'. Please use 'google.cloud.bigtable.BigtableDataClient' instead.", + "'Client' is deprecated. Please use 'google.cloud.bigtable.BigtableDataClient' instead.", DeprecationWarning, stacklevel=2, ) From 4e792a58e42f1493ab9c49641ed4acaee79b9c07 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 3 Mar 2023 16:42:10 -0800 Subject: [PATCH 016/349] fixed lint issues --- google/cloud/bigtable/__init__.py | 6 ++-- google/cloud/bigtable/client.py | 19 +++++++----- google/cloud/bigtable/exceptions.py | 30 +++++++++---------- google/cloud/bigtable/mutations.py | 6 ++-- google/cloud/bigtable/mutations_batcher.py | 1 + .../cloud/bigtable/read_modify_write_rules.py | 4 +-- google/cloud/bigtable/read_rows_query.py | 11 ++++--- 7 files changed, 42 insertions(+), 35 deletions(-) diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index 370857dd0..91cec3ffe 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -17,9 +17,6 @@ from typing_extensions import TypeAlias from typing import List, Tuple -# Type alias for the output of sample_keys -RowKeySamples: TypeAlias = List[Tuple[bytes, int]] - from google.cloud.bigtable import gapic_version as package_version from google.cloud.bigtable.client import BigtableDataClient @@ -37,6 +34,9 @@ from google.cloud.bigtable.mutations import DeleteAllFromFamily from google.cloud.bigtable.mutations import DeleteAllFromRow +# Type alias for the output of sample_keys +RowKeySamples: TypeAlias = List[Tuple[bytes, int]] + __version__: str = package_version.__version__ __all__ = ( diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index a7767cba7..93379cbb0 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -15,19 +15,22 @@ from __future__ import annotations -from typing import Any, AsyncIterable +from typing import Any, AsyncIterable, TYPE_CHECKING from google.cloud.client import ClientWithProject -from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry -from google.cloud.bigtable.mutations_batcher import MutationsBatcher -from google.cloud.bigtable.row_response import RowResponse -from google.cloud.bigtable.read_rows_query import ReadRowsQuery -from google.cloud.bigtable import RowKeySamples -from google.cloud.bigtable.row_filters import RowFilter -from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule + import google.auth.credentials +if TYPE_CHECKING: + from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry + from google.cloud.bigtable.mutations_batcher import MutationsBatcher + from google.cloud.bigtable.row_response import RowResponse + from google.cloud.bigtable.read_rows_query import ReadRowsQuery + from google.cloud.bigtable import RowKeySamples + from google.cloud.bigtable.row_filters import RowFilter + from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule + class BigtableDataClient(ClientWithProject): def __init__( diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 7df5ad4cf..6974ab55d 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -16,24 +16,10 @@ import sys -class MutationsExceptionGroup(BigtableExceptionGroup): - """ - Represents one or more exceptions that occur during a bulk mutation operation - """ - - pass - - -class RetryExceptionGroup(BigtableExceptionGroup): - """Represents one or more exceptions that occur during a retryable operation""" - - pass - - is_311_plus = sys.version_info >= (3, 11) -class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore +class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore # noqa: F821 """ Represents one or more exceptions that occur during a bulk Bigtable operation @@ -48,3 +34,17 @@ def __init__(self, message, excs): else: super().__init__(message) self.exceptions = excs + + +class MutationsExceptionGroup(BigtableExceptionGroup): + """ + Represents one or more exceptions that occur during a bulk mutation operation + """ + + pass + + +class RetryExceptionGroup(BigtableExceptionGroup): + """Represents one or more exceptions that occur during a retryable operation""" + + pass diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 8f63b776d..ed3c2f065 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -25,7 +25,7 @@ class Mutation: @dataclass class SetCell(Mutation): family: family_id - qualifier: qualifier + column_qualifier: qualifier new_value: bytes | str | int timestamp_ms: int | None = None @@ -33,7 +33,7 @@ class SetCell(Mutation): @dataclass class DeleteRangeFromColumn(Mutation): family: family_id - qualifier: qualifier + column_qualifier: qualifier start_timestamp_ms: int end_timestamp_ms: int @@ -50,5 +50,5 @@ class DeleteAllFromRow(Mutation): @dataclass class BulkMutationsEntry: - row_key: row_key + row: row_key mutations: list[Mutation] | Mutation diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 315a08cb4..9837124df 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -24,6 +24,7 @@ if TYPE_CHECKING: from google.cloud.bigtable.client import Table + class MutationsBatcher: """ Allows users to send batches using context manager API: diff --git a/google/cloud/bigtable/read_modify_write_rules.py b/google/cloud/bigtable/read_modify_write_rules.py index 910243fef..a9b0885f2 100644 --- a/google/cloud/bigtable/read_modify_write_rules.py +++ b/google/cloud/bigtable/read_modify_write_rules.py @@ -27,11 +27,11 @@ class ReadModifyWriteRule: class IncrementRule(ReadModifyWriteRule): increment_amount: int family: family_id - qualifier: qualifier + column_qualifier: qualifier @dataclass class AppendValueRule(ReadModifyWriteRule): append_value: bytes | str family: family_id - qualifier: qualifier + column_qualifier: qualifier diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 1411b6f37..64583b2d7 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -13,8 +13,11 @@ # limitations under the License. # from __future__ import annotations -from google.cloud.bigtable.row_filters import RowFilter -from google.cloud.bigtable import RowKeySamples +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from google.cloud.bigtable.row_filters import RowFilter + from google.cloud.bigtable import RowKeySamples class ReadRowsQuery: @@ -30,7 +33,7 @@ def __init__( def set_limit(self, limit: int) -> ReadRowsQuery: raise NotImplementedError - def set_filter(self, filter: RowFilter) -> ReadRowsQuery: + def set_filter(self, filter: "RowFilter") -> ReadRowsQuery: raise NotImplementedError def add_rows(self, row_id_list: list[str]) -> ReadRowsQuery: @@ -41,7 +44,7 @@ def add_range( ) -> ReadRowsQuery: raise NotImplementedError - def shard(self, shard_keys: RowKeySamples | None = None) -> list[ReadRowsQuery]: + def shard(self, shard_keys: "RowKeySamples" | None = None) -> list[ReadRowsQuery]: """ Split this query into multiple queries that can be evenly distributed across nodes and be run in parallel From 35e8a58cbef4f5f06cf77f207f6a48b2e6d6ce7e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 6 Mar 2023 13:19:38 -0800 Subject: [PATCH 017/349] added submodule for gapic fork --- .gitmodules | 3 +++ gapic-generator-fork | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 gapic-generator-fork diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..e50970aa7 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "gapic-generator-fork"] + path = gapic-generator-fork + url = git@github.com:daniel-sanche/gapic-generator-python.git diff --git a/gapic-generator-fork b/gapic-generator-fork new file mode 160000 index 000000000..8e3637ed7 --- /dev/null +++ b/gapic-generator-fork @@ -0,0 +1 @@ +Subproject commit 8e3637ed74f99f09d1e5941531a8836459ef1df3 From dfab8011d80ba6924e14bee07e953b8b6a9749b8 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 6 Mar 2023 13:27:17 -0800 Subject: [PATCH 018/349] pulled in upstream proto updates --- google/cloud/bigtable_v2/__init__.py | 14 + .../services/bigtable/async_client.py | 469 +++++++++++++++++- .../bigtable_v2/services/bigtable/client.py | 449 +++++++++++++++++ .../services/bigtable/transports/base.py | 56 ++- .../services/bigtable/transports/grpc.py | 66 +++ .../bigtable/transports/grpc_asyncio.py | 66 +++ google/cloud/bigtable_v2/types/__init__.py | 14 + google/cloud/bigtable_v2/types/bigtable.py | 444 ++++++++++++++++- google/cloud/bigtable_v2/types/data.py | 61 +++ 9 files changed, 1605 insertions(+), 34 deletions(-) diff --git a/google/cloud/bigtable_v2/__init__.py b/google/cloud/bigtable_v2/__init__.py index b343c985d..342718dea 100644 --- a/google/cloud/bigtable_v2/__init__.py +++ b/google/cloud/bigtable_v2/__init__.py @@ -23,12 +23,16 @@ from .types.bigtable import CheckAndMutateRowRequest from .types.bigtable import CheckAndMutateRowResponse +from .types.bigtable import GenerateInitialChangeStreamPartitionsRequest +from .types.bigtable import GenerateInitialChangeStreamPartitionsResponse from .types.bigtable import MutateRowRequest from .types.bigtable import MutateRowResponse from .types.bigtable import MutateRowsRequest from .types.bigtable import MutateRowsResponse from .types.bigtable import PingAndWarmRequest from .types.bigtable import PingAndWarmResponse +from .types.bigtable import ReadChangeStreamRequest +from .types.bigtable import ReadChangeStreamResponse from .types.bigtable import ReadModifyWriteRowRequest from .types.bigtable import ReadModifyWriteRowResponse from .types.bigtable import ReadRowsRequest @@ -45,6 +49,9 @@ from .types.data import RowFilter from .types.data import RowRange from .types.data import RowSet +from .types.data import StreamContinuationToken +from .types.data import StreamContinuationTokens +from .types.data import StreamPartition from .types.data import TimestampRange from .types.data import ValueRange from .types.request_stats import FullReadStatsView @@ -63,6 +70,8 @@ "ColumnRange", "Family", "FullReadStatsView", + "GenerateInitialChangeStreamPartitionsRequest", + "GenerateInitialChangeStreamPartitionsResponse", "MutateRowRequest", "MutateRowResponse", "MutateRowsRequest", @@ -70,6 +79,8 @@ "Mutation", "PingAndWarmRequest", "PingAndWarmResponse", + "ReadChangeStreamRequest", + "ReadChangeStreamResponse", "ReadIterationStats", "ReadModifyWriteRowRequest", "ReadModifyWriteRowResponse", @@ -85,6 +96,9 @@ "RowSet", "SampleRowKeysRequest", "SampleRowKeysResponse", + "StreamContinuationToken", + "StreamContinuationTokens", + "StreamPartition", "TimestampRange", "ValueRange", ) diff --git a/google/cloud/bigtable_v2/services/bigtable/async_client.py b/google/cloud/bigtable_v2/services/bigtable/async_client.py index 8a25fa3af..6735e6d03 100644 --- a/google/cloud/bigtable_v2/services/bigtable/async_client.py +++ b/google/cloud/bigtable_v2/services/bigtable/async_client.py @@ -229,6 +229,33 @@ def read_rows( each row will still be preserved. See the ReadRowsResponse documentation for details. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + async def sample_read_rows(): + # Create a client + client = bigtable_v2.BigtableAsyncClient() + + # Initialize request argument(s) + request = bigtable_v2.ReadRowsRequest( + table_name="table_name_value", + ) + + # Make the request + stream = await client.read_rows(request=request) + + # Handle the response + async for response in stream: + print(response) + Args: request (Optional[Union[google.cloud.bigtable_v2.types.ReadRowsRequest, dict]]): The request object. Request message for @@ -283,7 +310,7 @@ def read_rows( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.read_rows, - default_timeout=43200.0, + default_timeout=None, client_info=DEFAULT_CLIENT_INFO, ) @@ -322,6 +349,33 @@ def sample_row_keys( to break up the data for distributed tasks like mapreduces. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + async def sample_sample_row_keys(): + # Create a client + client = bigtable_v2.BigtableAsyncClient() + + # Initialize request argument(s) + request = bigtable_v2.SampleRowKeysRequest( + table_name="table_name_value", + ) + + # Make the request + stream = await client.sample_row_keys(request=request) + + # Handle the response + async for response in stream: + print(response) + Args: request (Optional[Union[google.cloud.bigtable_v2.types.SampleRowKeysRequest, dict]]): The request object. Request message for @@ -378,7 +432,7 @@ def sample_row_keys( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.sample_row_keys, - default_timeout=60.0, + default_timeout=None, client_info=DEFAULT_CLIENT_INFO, ) @@ -416,6 +470,33 @@ async def mutate_row( r"""Mutates a row atomically. Cells already present in the row are left unchanged unless explicitly changed by ``mutation``. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + async def sample_mutate_row(): + # Create a client + client = bigtable_v2.BigtableAsyncClient() + + # Initialize request argument(s) + request = bigtable_v2.MutateRowRequest( + table_name="table_name_value", + row_key=b'row_key_blob', + ) + + # Make the request + response = await client.mutate_row(request=request) + + # Handle the response + print(response) + Args: request (Optional[Union[google.cloud.bigtable_v2.types.MutateRowRequest, dict]]): The request object. Request message for @@ -494,17 +575,7 @@ async def mutate_row( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.mutate_row, - default_retry=retries.Retry( - initial=0.01, - maximum=60.0, - multiplier=2, - predicate=retries.if_exception_type( - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - ), - deadline=60.0, - ), - default_timeout=60.0, + default_timeout=None, client_info=DEFAULT_CLIENT_INFO, ) @@ -542,6 +613,33 @@ def mutate_rows( is mutated atomically as in MutateRow, but the entire batch is not executed atomically. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + async def sample_mutate_rows(): + # Create a client + client = bigtable_v2.BigtableAsyncClient() + + # Initialize request argument(s) + request = bigtable_v2.MutateRowsRequest( + table_name="table_name_value", + ) + + # Make the request + stream = await client.mutate_rows(request=request) + + # Handle the response + async for response in stream: + print(response) + Args: request (Optional[Union[google.cloud.bigtable_v2.types.MutateRowsRequest, dict]]): The request object. Request message for @@ -614,7 +712,7 @@ def mutate_rows( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.mutate_rows, - default_timeout=600.0, + default_timeout=None, client_info=DEFAULT_CLIENT_INFO, ) @@ -654,6 +752,33 @@ async def check_and_mutate_row( r"""Mutates a row atomically based on the output of a predicate Reader filter. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + async def sample_check_and_mutate_row(): + # Create a client + client = bigtable_v2.BigtableAsyncClient() + + # Initialize request argument(s) + request = bigtable_v2.CheckAndMutateRowRequest( + table_name="table_name_value", + row_key=b'row_key_blob', + ) + + # Make the request + response = await client.check_and_mutate_row(request=request) + + # Handle the response + print(response) + Args: request (Optional[Union[google.cloud.bigtable_v2.types.CheckAndMutateRowRequest, dict]]): The request object. Request message for @@ -768,7 +893,7 @@ async def check_and_mutate_row( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.check_and_mutate_row, - default_timeout=20.0, + default_timeout=None, client_info=DEFAULT_CLIENT_INFO, ) @@ -805,6 +930,32 @@ async def ping_and_warm( connection. This call is not required but may be useful for connection keep-alive. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + async def sample_ping_and_warm(): + # Create a client + client = bigtable_v2.BigtableAsyncClient() + + # Initialize request argument(s) + request = bigtable_v2.PingAndWarmRequest( + name="name_value", + ) + + # Make the request + response = await client.ping_and_warm(request=request) + + # Handle the response + print(response) + Args: request (Optional[Union[google.cloud.bigtable_v2.types.PingAndWarmRequest, dict]]): The request object. Request message for client @@ -903,6 +1054,37 @@ async def read_modify_write_row( or the current server time. The method returns the new contents of all modified cells. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + async def sample_read_modify_write_row(): + # Create a client + client = bigtable_v2.BigtableAsyncClient() + + # Initialize request argument(s) + rules = bigtable_v2.ReadModifyWriteRule() + rules.append_value = b'append_value_blob' + + request = bigtable_v2.ReadModifyWriteRowRequest( + table_name="table_name_value", + row_key=b'row_key_blob', + rules=rules, + ) + + # Make the request + response = await client.read_modify_write_row(request=request) + + # Handle the response + print(response) + Args: request (Optional[Union[google.cloud.bigtable_v2.types.ReadModifyWriteRowRequest, dict]]): The request object. Request message for @@ -983,7 +1165,7 @@ async def read_modify_write_row( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.read_modify_write_row, - default_timeout=20.0, + default_timeout=None, client_info=DEFAULT_CLIENT_INFO, ) @@ -1006,6 +1188,261 @@ async def read_modify_write_row( # Done; return the response. return response + def generate_initial_change_stream_partitions( + self, + request: Optional[ + Union[bigtable.GenerateInitialChangeStreamPartitionsRequest, dict] + ] = None, + *, + table_name: Optional[str] = None, + app_profile_id: Optional[str] = None, + retry: OptionalRetry = gapic_v1.method.DEFAULT, + timeout: Union[float, object] = gapic_v1.method.DEFAULT, + metadata: Sequence[Tuple[str, str]] = (), + ) -> Awaitable[ + AsyncIterable[bigtable.GenerateInitialChangeStreamPartitionsResponse] + ]: + r"""NOTE: This API is intended to be used by Apache Beam BigtableIO. + Returns the current list of partitions that make up the table's + change stream. The union of partitions will cover the entire + keyspace. Partitions can be read with ``ReadChangeStream``. + + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + async def sample_generate_initial_change_stream_partitions(): + # Create a client + client = bigtable_v2.BigtableAsyncClient() + + # Initialize request argument(s) + request = bigtable_v2.GenerateInitialChangeStreamPartitionsRequest( + table_name="table_name_value", + ) + + # Make the request + stream = await client.generate_initial_change_stream_partitions(request=request) + + # Handle the response + async for response in stream: + print(response) + + Args: + request (Optional[Union[google.cloud.bigtable_v2.types.GenerateInitialChangeStreamPartitionsRequest, dict]]): + The request object. NOTE: This API is intended to be + used by Apache Beam BigtableIO. Request message for + Bigtable.GenerateInitialChangeStreamPartitions. + table_name (:class:`str`): + Required. The unique name of the table from which to get + change stream partitions. Values are of the form + ``projects//instances//tables/``. + Change streaming must be enabled on the table. + + This corresponds to the ``table_name`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + app_profile_id (:class:`str`): + This value specifies routing for + replication. If not specified, the + "default" application profile will be + used. Single cluster routing must be + configured on the profile. + + This corresponds to the ``app_profile_id`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + AsyncIterable[google.cloud.bigtable_v2.types.GenerateInitialChangeStreamPartitionsResponse]: + NOTE: This API is intended to be used + by Apache Beam BigtableIO. Response + message for + Bigtable.GenerateInitialChangeStreamPartitions. + + """ + # Create or coerce a protobuf request object. + # Quick check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([table_name, app_profile_id]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = bigtable.GenerateInitialChangeStreamPartitionsRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + if table_name is not None: + request.table_name = table_name + if app_profile_id is not None: + request.app_profile_id = app_profile_id + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.generate_initial_change_stream_partitions, + default_timeout=None, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("table_name", request.table_name),) + ), + ) + + # Send the request. + response = rpc( + request, + retry=retry, + timeout=timeout, + metadata=metadata, + ) + + # Done; return the response. + return response + + def read_change_stream( + self, + request: Optional[Union[bigtable.ReadChangeStreamRequest, dict]] = None, + *, + table_name: Optional[str] = None, + app_profile_id: Optional[str] = None, + retry: OptionalRetry = gapic_v1.method.DEFAULT, + timeout: Union[float, object] = gapic_v1.method.DEFAULT, + metadata: Sequence[Tuple[str, str]] = (), + ) -> Awaitable[AsyncIterable[bigtable.ReadChangeStreamResponse]]: + r"""NOTE: This API is intended to be used by Apache Beam + BigtableIO. Reads changes from a table's change stream. + Changes will reflect both user-initiated mutations and + mutations that are caused by garbage collection. + + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + async def sample_read_change_stream(): + # Create a client + client = bigtable_v2.BigtableAsyncClient() + + # Initialize request argument(s) + request = bigtable_v2.ReadChangeStreamRequest( + table_name="table_name_value", + ) + + # Make the request + stream = await client.read_change_stream(request=request) + + # Handle the response + async for response in stream: + print(response) + + Args: + request (Optional[Union[google.cloud.bigtable_v2.types.ReadChangeStreamRequest, dict]]): + The request object. NOTE: This API is intended to be + used by Apache Beam BigtableIO. Request message for + Bigtable.ReadChangeStream. + table_name (:class:`str`): + Required. The unique name of the table from which to + read a change stream. Values are of the form + ``projects//instances//tables/
``. + Change streaming must be enabled on the table. + + This corresponds to the ``table_name`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + app_profile_id (:class:`str`): + This value specifies routing for + replication. If not specified, the + "default" application profile will be + used. Single cluster routing must be + configured on the profile. + + This corresponds to the ``app_profile_id`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + AsyncIterable[google.cloud.bigtable_v2.types.ReadChangeStreamResponse]: + NOTE: This API is intended to be used + by Apache Beam BigtableIO. Response + message for Bigtable.ReadChangeStream. + + """ + # Create or coerce a protobuf request object. + # Quick check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([table_name, app_profile_id]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = bigtable.ReadChangeStreamRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + if table_name is not None: + request.table_name = table_name + if app_profile_id is not None: + request.app_profile_id = app_profile_id + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.read_change_stream, + default_timeout=None, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("table_name", request.table_name),) + ), + ) + + # Send the request. + response = rpc( + request, + retry=retry, + timeout=timeout, + metadata=metadata, + ) + + # Done; return the response. + return response + async def __aenter__(self): return self diff --git a/google/cloud/bigtable_v2/services/bigtable/client.py b/google/cloud/bigtable_v2/services/bigtable/client.py index aaff4669f..53a0aa8f7 100644 --- a/google/cloud/bigtable_v2/services/bigtable/client.py +++ b/google/cloud/bigtable_v2/services/bigtable/client.py @@ -476,6 +476,33 @@ def read_rows( each row will still be preserved. See the ReadRowsResponse documentation for details. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + def sample_read_rows(): + # Create a client + client = bigtable_v2.BigtableClient() + + # Initialize request argument(s) + request = bigtable_v2.ReadRowsRequest( + table_name="table_name_value", + ) + + # Make the request + stream = client.read_rows(request=request) + + # Handle the response + for response in stream: + print(response) + Args: request (Union[google.cloud.bigtable_v2.types.ReadRowsRequest, dict]): The request object. Request message for @@ -578,6 +605,33 @@ def sample_row_keys( to break up the data for distributed tasks like mapreduces. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + def sample_sample_row_keys(): + # Create a client + client = bigtable_v2.BigtableClient() + + # Initialize request argument(s) + request = bigtable_v2.SampleRowKeysRequest( + table_name="table_name_value", + ) + + # Make the request + stream = client.sample_row_keys(request=request) + + # Handle the response + for response in stream: + print(response) + Args: request (Union[google.cloud.bigtable_v2.types.SampleRowKeysRequest, dict]): The request object. Request message for @@ -681,6 +735,33 @@ def mutate_row( r"""Mutates a row atomically. Cells already present in the row are left unchanged unless explicitly changed by ``mutation``. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + def sample_mutate_row(): + # Create a client + client = bigtable_v2.BigtableClient() + + # Initialize request argument(s) + request = bigtable_v2.MutateRowRequest( + table_name="table_name_value", + row_key=b'row_key_blob', + ) + + # Make the request + response = client.mutate_row(request=request) + + # Handle the response + print(response) + Args: request (Union[google.cloud.bigtable_v2.types.MutateRowRequest, dict]): The request object. Request message for @@ -806,6 +887,33 @@ def mutate_rows( is mutated atomically as in MutateRow, but the entire batch is not executed atomically. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + def sample_mutate_rows(): + # Create a client + client = bigtable_v2.BigtableClient() + + # Initialize request argument(s) + request = bigtable_v2.MutateRowsRequest( + table_name="table_name_value", + ) + + # Make the request + stream = client.mutate_rows(request=request) + + # Handle the response + for response in stream: + print(response) + Args: request (Union[google.cloud.bigtable_v2.types.MutateRowsRequest, dict]): The request object. Request message for @@ -927,6 +1035,33 @@ def check_and_mutate_row( r"""Mutates a row atomically based on the output of a predicate Reader filter. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + def sample_check_and_mutate_row(): + # Create a client + client = bigtable_v2.BigtableClient() + + # Initialize request argument(s) + request = bigtable_v2.CheckAndMutateRowRequest( + table_name="table_name_value", + row_key=b'row_key_blob', + ) + + # Make the request + response = client.check_and_mutate_row(request=request) + + # Handle the response + print(response) + Args: request (Union[google.cloud.bigtable_v2.types.CheckAndMutateRowRequest, dict]): The request object. Request message for @@ -1087,6 +1222,32 @@ def ping_and_warm( connection. This call is not required but may be useful for connection keep-alive. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + def sample_ping_and_warm(): + # Create a client + client = bigtable_v2.BigtableClient() + + # Initialize request argument(s) + request = bigtable_v2.PingAndWarmRequest( + name="name_value", + ) + + # Make the request + response = client.ping_and_warm(request=request) + + # Handle the response + print(response) + Args: request (Union[google.cloud.bigtable_v2.types.PingAndWarmRequest, dict]): The request object. Request message for client @@ -1194,6 +1355,37 @@ def read_modify_write_row( or the current server time. The method returns the new contents of all modified cells. + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + def sample_read_modify_write_row(): + # Create a client + client = bigtable_v2.BigtableClient() + + # Initialize request argument(s) + rules = bigtable_v2.ReadModifyWriteRule() + rules.append_value = b'append_value_blob' + + request = bigtable_v2.ReadModifyWriteRowRequest( + table_name="table_name_value", + row_key=b'row_key_blob', + rules=rules, + ) + + # Make the request + response = client.read_modify_write_row(request=request) + + # Handle the response + print(response) + Args: request (Union[google.cloud.bigtable_v2.types.ReadModifyWriteRowRequest, dict]): The request object. Request message for @@ -1306,6 +1498,263 @@ def read_modify_write_row( # Done; return the response. return response + def generate_initial_change_stream_partitions( + self, + request: Optional[ + Union[bigtable.GenerateInitialChangeStreamPartitionsRequest, dict] + ] = None, + *, + table_name: Optional[str] = None, + app_profile_id: Optional[str] = None, + retry: OptionalRetry = gapic_v1.method.DEFAULT, + timeout: Union[float, object] = gapic_v1.method.DEFAULT, + metadata: Sequence[Tuple[str, str]] = (), + ) -> Iterable[bigtable.GenerateInitialChangeStreamPartitionsResponse]: + r"""NOTE: This API is intended to be used by Apache Beam BigtableIO. + Returns the current list of partitions that make up the table's + change stream. The union of partitions will cover the entire + keyspace. Partitions can be read with ``ReadChangeStream``. + + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + def sample_generate_initial_change_stream_partitions(): + # Create a client + client = bigtable_v2.BigtableClient() + + # Initialize request argument(s) + request = bigtable_v2.GenerateInitialChangeStreamPartitionsRequest( + table_name="table_name_value", + ) + + # Make the request + stream = client.generate_initial_change_stream_partitions(request=request) + + # Handle the response + for response in stream: + print(response) + + Args: + request (Union[google.cloud.bigtable_v2.types.GenerateInitialChangeStreamPartitionsRequest, dict]): + The request object. NOTE: This API is intended to be + used by Apache Beam BigtableIO. Request message for + Bigtable.GenerateInitialChangeStreamPartitions. + table_name (str): + Required. The unique name of the table from which to get + change stream partitions. Values are of the form + ``projects//instances//tables/
``. + Change streaming must be enabled on the table. + + This corresponds to the ``table_name`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + app_profile_id (str): + This value specifies routing for + replication. If not specified, the + "default" application profile will be + used. Single cluster routing must be + configured on the profile. + + This corresponds to the ``app_profile_id`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + Iterable[google.cloud.bigtable_v2.types.GenerateInitialChangeStreamPartitionsResponse]: + NOTE: This API is intended to be used + by Apache Beam BigtableIO. Response + message for + Bigtable.GenerateInitialChangeStreamPartitions. + + """ + # Create or coerce a protobuf request object. + # Quick check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([table_name, app_profile_id]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a bigtable.GenerateInitialChangeStreamPartitionsRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance( + request, bigtable.GenerateInitialChangeStreamPartitionsRequest + ): + request = bigtable.GenerateInitialChangeStreamPartitionsRequest(request) + # If we have keyword arguments corresponding to fields on the + # request, apply these. + if table_name is not None: + request.table_name = table_name + if app_profile_id is not None: + request.app_profile_id = app_profile_id + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[ + self._transport.generate_initial_change_stream_partitions + ] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("table_name", request.table_name),) + ), + ) + + # Send the request. + response = rpc( + request, + retry=retry, + timeout=timeout, + metadata=metadata, + ) + + # Done; return the response. + return response + + def read_change_stream( + self, + request: Optional[Union[bigtable.ReadChangeStreamRequest, dict]] = None, + *, + table_name: Optional[str] = None, + app_profile_id: Optional[str] = None, + retry: OptionalRetry = gapic_v1.method.DEFAULT, + timeout: Union[float, object] = gapic_v1.method.DEFAULT, + metadata: Sequence[Tuple[str, str]] = (), + ) -> Iterable[bigtable.ReadChangeStreamResponse]: + r"""NOTE: This API is intended to be used by Apache Beam + BigtableIO. Reads changes from a table's change stream. + Changes will reflect both user-initiated mutations and + mutations that are caused by garbage collection. + + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google import bigtable_v2 + + def sample_read_change_stream(): + # Create a client + client = bigtable_v2.BigtableClient() + + # Initialize request argument(s) + request = bigtable_v2.ReadChangeStreamRequest( + table_name="table_name_value", + ) + + # Make the request + stream = client.read_change_stream(request=request) + + # Handle the response + for response in stream: + print(response) + + Args: + request (Union[google.cloud.bigtable_v2.types.ReadChangeStreamRequest, dict]): + The request object. NOTE: This API is intended to be + used by Apache Beam BigtableIO. Request message for + Bigtable.ReadChangeStream. + table_name (str): + Required. The unique name of the table from which to + read a change stream. Values are of the form + ``projects//instances//tables/
``. + Change streaming must be enabled on the table. + + This corresponds to the ``table_name`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + app_profile_id (str): + This value specifies routing for + replication. If not specified, the + "default" application profile will be + used. Single cluster routing must be + configured on the profile. + + This corresponds to the ``app_profile_id`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + Iterable[google.cloud.bigtable_v2.types.ReadChangeStreamResponse]: + NOTE: This API is intended to be used + by Apache Beam BigtableIO. Response + message for Bigtable.ReadChangeStream. + + """ + # Create or coerce a protobuf request object. + # Quick check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([table_name, app_profile_id]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a bigtable.ReadChangeStreamRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, bigtable.ReadChangeStreamRequest): + request = bigtable.ReadChangeStreamRequest(request) + # If we have keyword arguments corresponding to fields on the + # request, apply these. + if table_name is not None: + request.table_name = table_name + if app_profile_id is not None: + request.app_profile_id = app_profile_id + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.read_change_stream] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("table_name", request.table_name),) + ), + ) + + # Send the request. + response = rpc( + request, + retry=retry, + timeout=timeout, + metadata=metadata, + ) + + # Done; return the response. + return response + def __enter__(self) -> "BigtableClient": return self diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/base.py b/google/cloud/bigtable_v2/services/bigtable/transports/base.py index a32ea682b..5879a63cb 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/base.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/base.py @@ -132,37 +132,27 @@ def _prep_wrapped_messages(self, client_info): self._wrapped_methods = { self.read_rows: gapic_v1.method.wrap_method( self.read_rows, - default_timeout=43200.0, + default_timeout=None, client_info=client_info, ), self.sample_row_keys: gapic_v1.method.wrap_method( self.sample_row_keys, - default_timeout=60.0, + default_timeout=None, client_info=client_info, ), self.mutate_row: gapic_v1.method.wrap_method( self.mutate_row, - default_retry=retries.Retry( - initial=0.01, - maximum=60.0, - multiplier=2, - predicate=retries.if_exception_type( - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - ), - deadline=60.0, - ), - default_timeout=60.0, + default_timeout=None, client_info=client_info, ), self.mutate_rows: gapic_v1.method.wrap_method( self.mutate_rows, - default_timeout=600.0, + default_timeout=None, client_info=client_info, ), self.check_and_mutate_row: gapic_v1.method.wrap_method( self.check_and_mutate_row, - default_timeout=20.0, + default_timeout=None, client_info=client_info, ), self.ping_and_warm: gapic_v1.method.wrap_method( @@ -172,7 +162,17 @@ def _prep_wrapped_messages(self, client_info): ), self.read_modify_write_row: gapic_v1.method.wrap_method( self.read_modify_write_row, - default_timeout=20.0, + default_timeout=None, + client_info=client_info, + ), + self.generate_initial_change_stream_partitions: gapic_v1.method.wrap_method( + self.generate_initial_change_stream_partitions, + default_timeout=None, + client_info=client_info, + ), + self.read_change_stream: gapic_v1.method.wrap_method( + self.read_change_stream, + default_timeout=None, client_info=client_info, ), } @@ -257,6 +257,30 @@ def read_modify_write_row( ]: raise NotImplementedError() + @property + def generate_initial_change_stream_partitions( + self, + ) -> Callable[ + [bigtable.GenerateInitialChangeStreamPartitionsRequest], + Union[ + bigtable.GenerateInitialChangeStreamPartitionsResponse, + Awaitable[bigtable.GenerateInitialChangeStreamPartitionsResponse], + ], + ]: + raise NotImplementedError() + + @property + def read_change_stream( + self, + ) -> Callable[ + [bigtable.ReadChangeStreamRequest], + Union[ + bigtable.ReadChangeStreamResponse, + Awaitable[bigtable.ReadChangeStreamResponse], + ], + ]: + raise NotImplementedError() + @property def kind(self) -> str: raise NotImplementedError() diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/grpc.py b/google/cloud/bigtable_v2/services/bigtable/transports/grpc.py index cf6723678..b9e073e8a 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/grpc.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/grpc.py @@ -437,6 +437,72 @@ def read_modify_write_row( ) return self._stubs["read_modify_write_row"] + @property + def generate_initial_change_stream_partitions( + self, + ) -> Callable[ + [bigtable.GenerateInitialChangeStreamPartitionsRequest], + bigtable.GenerateInitialChangeStreamPartitionsResponse, + ]: + r"""Return a callable for the generate initial change stream + partitions method over gRPC. + + NOTE: This API is intended to be used by Apache Beam BigtableIO. + Returns the current list of partitions that make up the table's + change stream. The union of partitions will cover the entire + keyspace. Partitions can be read with ``ReadChangeStream``. + + Returns: + Callable[[~.GenerateInitialChangeStreamPartitionsRequest], + ~.GenerateInitialChangeStreamPartitionsResponse]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "generate_initial_change_stream_partitions" not in self._stubs: + self._stubs[ + "generate_initial_change_stream_partitions" + ] = self.grpc_channel.unary_stream( + "/google.bigtable.v2.Bigtable/GenerateInitialChangeStreamPartitions", + request_serializer=bigtable.GenerateInitialChangeStreamPartitionsRequest.serialize, + response_deserializer=bigtable.GenerateInitialChangeStreamPartitionsResponse.deserialize, + ) + return self._stubs["generate_initial_change_stream_partitions"] + + @property + def read_change_stream( + self, + ) -> Callable[ + [bigtable.ReadChangeStreamRequest], bigtable.ReadChangeStreamResponse + ]: + r"""Return a callable for the read change stream method over gRPC. + + NOTE: This API is intended to be used by Apache Beam + BigtableIO. Reads changes from a table's change stream. + Changes will reflect both user-initiated mutations and + mutations that are caused by garbage collection. + + Returns: + Callable[[~.ReadChangeStreamRequest], + ~.ReadChangeStreamResponse]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "read_change_stream" not in self._stubs: + self._stubs["read_change_stream"] = self.grpc_channel.unary_stream( + "/google.bigtable.v2.Bigtable/ReadChangeStream", + request_serializer=bigtable.ReadChangeStreamRequest.serialize, + response_deserializer=bigtable.ReadChangeStreamResponse.deserialize, + ) + return self._stubs["read_change_stream"] + def close(self): self.grpc_channel.close() diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/grpc_asyncio.py index 26d89c847..8bf02ce77 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/grpc_asyncio.py @@ -446,6 +446,72 @@ def read_modify_write_row( ) return self._stubs["read_modify_write_row"] + @property + def generate_initial_change_stream_partitions( + self, + ) -> Callable[ + [bigtable.GenerateInitialChangeStreamPartitionsRequest], + Awaitable[bigtable.GenerateInitialChangeStreamPartitionsResponse], + ]: + r"""Return a callable for the generate initial change stream + partitions method over gRPC. + + NOTE: This API is intended to be used by Apache Beam BigtableIO. + Returns the current list of partitions that make up the table's + change stream. The union of partitions will cover the entire + keyspace. Partitions can be read with ``ReadChangeStream``. + + Returns: + Callable[[~.GenerateInitialChangeStreamPartitionsRequest], + Awaitable[~.GenerateInitialChangeStreamPartitionsResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "generate_initial_change_stream_partitions" not in self._stubs: + self._stubs[ + "generate_initial_change_stream_partitions" + ] = self.grpc_channel.unary_stream( + "/google.bigtable.v2.Bigtable/GenerateInitialChangeStreamPartitions", + request_serializer=bigtable.GenerateInitialChangeStreamPartitionsRequest.serialize, + response_deserializer=bigtable.GenerateInitialChangeStreamPartitionsResponse.deserialize, + ) + return self._stubs["generate_initial_change_stream_partitions"] + + @property + def read_change_stream( + self, + ) -> Callable[ + [bigtable.ReadChangeStreamRequest], Awaitable[bigtable.ReadChangeStreamResponse] + ]: + r"""Return a callable for the read change stream method over gRPC. + + NOTE: This API is intended to be used by Apache Beam + BigtableIO. Reads changes from a table's change stream. + Changes will reflect both user-initiated mutations and + mutations that are caused by garbage collection. + + Returns: + Callable[[~.ReadChangeStreamRequest], + Awaitable[~.ReadChangeStreamResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "read_change_stream" not in self._stubs: + self._stubs["read_change_stream"] = self.grpc_channel.unary_stream( + "/google.bigtable.v2.Bigtable/ReadChangeStream", + request_serializer=bigtable.ReadChangeStreamRequest.serialize, + response_deserializer=bigtable.ReadChangeStreamResponse.deserialize, + ) + return self._stubs["read_change_stream"] + def close(self): return self.grpc_channel.close() diff --git a/google/cloud/bigtable_v2/types/__init__.py b/google/cloud/bigtable_v2/types/__init__.py index 3499cf5d1..bb2533e33 100644 --- a/google/cloud/bigtable_v2/types/__init__.py +++ b/google/cloud/bigtable_v2/types/__init__.py @@ -16,12 +16,16 @@ from .bigtable import ( CheckAndMutateRowRequest, CheckAndMutateRowResponse, + GenerateInitialChangeStreamPartitionsRequest, + GenerateInitialChangeStreamPartitionsResponse, MutateRowRequest, MutateRowResponse, MutateRowsRequest, MutateRowsResponse, PingAndWarmRequest, PingAndWarmResponse, + ReadChangeStreamRequest, + ReadChangeStreamResponse, ReadModifyWriteRowRequest, ReadModifyWriteRowResponse, ReadRowsRequest, @@ -40,6 +44,9 @@ RowFilter, RowRange, RowSet, + StreamContinuationToken, + StreamContinuationTokens, + StreamPartition, TimestampRange, ValueRange, ) @@ -56,12 +63,16 @@ __all__ = ( "CheckAndMutateRowRequest", "CheckAndMutateRowResponse", + "GenerateInitialChangeStreamPartitionsRequest", + "GenerateInitialChangeStreamPartitionsResponse", "MutateRowRequest", "MutateRowResponse", "MutateRowsRequest", "MutateRowsResponse", "PingAndWarmRequest", "PingAndWarmResponse", + "ReadChangeStreamRequest", + "ReadChangeStreamResponse", "ReadModifyWriteRowRequest", "ReadModifyWriteRowResponse", "ReadRowsRequest", @@ -78,6 +89,9 @@ "RowFilter", "RowRange", "RowSet", + "StreamContinuationToken", + "StreamContinuationTokens", + "StreamPartition", "TimestampRange", "ValueRange", "FullReadStatsView", diff --git a/google/cloud/bigtable_v2/types/bigtable.py b/google/cloud/bigtable_v2/types/bigtable.py index 8124cb7e3..378b7500d 100644 --- a/google/cloud/bigtable_v2/types/bigtable.py +++ b/google/cloud/bigtable_v2/types/bigtable.py @@ -19,6 +19,8 @@ from google.cloud.bigtable_v2.types import data from google.cloud.bigtable_v2.types import request_stats as gb_request_stats +from google.protobuf import duration_pb2 # type: ignore +from google.protobuf import timestamp_pb2 # type: ignore from google.protobuf import wrappers_pb2 # type: ignore from google.rpc import status_pb2 # type: ignore @@ -40,6 +42,10 @@ "PingAndWarmResponse", "ReadModifyWriteRowRequest", "ReadModifyWriteRowResponse", + "GenerateInitialChangeStreamPartitionsRequest", + "GenerateInitialChangeStreamPartitionsResponse", + "ReadChangeStreamRequest", + "ReadChangeStreamResponse", }, ) @@ -429,8 +435,8 @@ class Entry(proto.Message): Required. Changes to be atomically applied to the specified row. Mutations are applied in order, meaning that earlier mutations can be - masked by later ones. - You must specify at least one mutation. + masked by later ones. You must specify at least + one mutation. """ row_key: bytes = proto.Field( @@ -672,4 +678,438 @@ class ReadModifyWriteRowResponse(proto.Message): ) +class GenerateInitialChangeStreamPartitionsRequest(proto.Message): + r"""NOTE: This API is intended to be used by Apache Beam + BigtableIO. Request message for + Bigtable.GenerateInitialChangeStreamPartitions. + + Attributes: + table_name (str): + Required. The unique name of the table from which to get + change stream partitions. Values are of the form + ``projects//instances//tables/
``. + Change streaming must be enabled on the table. + app_profile_id (str): + This value specifies routing for replication. + If not specified, the "default" application + profile will be used. Single cluster routing + must be configured on the profile. + """ + + table_name: str = proto.Field( + proto.STRING, + number=1, + ) + app_profile_id: str = proto.Field( + proto.STRING, + number=2, + ) + + +class GenerateInitialChangeStreamPartitionsResponse(proto.Message): + r"""NOTE: This API is intended to be used by Apache Beam + BigtableIO. Response message for + Bigtable.GenerateInitialChangeStreamPartitions. + + Attributes: + partition (google.cloud.bigtable_v2.types.StreamPartition): + A partition of the change stream. + """ + + partition: data.StreamPartition = proto.Field( + proto.MESSAGE, + number=1, + message=data.StreamPartition, + ) + + +class ReadChangeStreamRequest(proto.Message): + r"""NOTE: This API is intended to be used by Apache Beam + BigtableIO. Request message for Bigtable.ReadChangeStream. + + This message has `oneof`_ fields (mutually exclusive fields). + For each oneof, at most one member field can be set at the same time. + Setting any member of the oneof automatically clears all other + members. + + .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields + + Attributes: + table_name (str): + Required. The unique name of the table from which to read a + change stream. Values are of the form + ``projects//instances//tables/
``. + Change streaming must be enabled on the table. + app_profile_id (str): + This value specifies routing for replication. + If not specified, the "default" application + profile will be used. Single cluster routing + must be configured on the profile. + partition (google.cloud.bigtable_v2.types.StreamPartition): + The partition to read changes from. + start_time (google.protobuf.timestamp_pb2.Timestamp): + Start reading the stream at the specified + timestamp. This timestamp must be within the + change stream retention period, less than or + equal to the current time, and after change + stream creation, whichever is greater. This + value is inclusive and will be truncated to + microsecond granularity. + + This field is a member of `oneof`_ ``start_from``. + continuation_tokens (google.cloud.bigtable_v2.types.StreamContinuationTokens): + Tokens that describe how to resume reading a stream where + reading previously left off. If specified, changes will be + read starting at the the position. Tokens are delivered on + the stream as part of ``Heartbeat`` and ``CloseStream`` + messages. + + If a single token is provided, the token’s partition must + exactly match the request’s partition. If multiple tokens + are provided, as in the case of a partition merge, the union + of the token partitions must exactly cover the request’s + partition. Otherwise, INVALID_ARGUMENT will be returned. + + This field is a member of `oneof`_ ``start_from``. + end_time (google.protobuf.timestamp_pb2.Timestamp): + If specified, OK will be returned when the + stream advances beyond this time. Otherwise, + changes will be continuously delivered on the + stream. This value is inclusive and will be + truncated to microsecond granularity. + heartbeat_duration (google.protobuf.duration_pb2.Duration): + If specified, the duration between ``Heartbeat`` messages on + the stream. Otherwise, defaults to 5 seconds. + """ + + table_name: str = proto.Field( + proto.STRING, + number=1, + ) + app_profile_id: str = proto.Field( + proto.STRING, + number=2, + ) + partition: data.StreamPartition = proto.Field( + proto.MESSAGE, + number=3, + message=data.StreamPartition, + ) + start_time: timestamp_pb2.Timestamp = proto.Field( + proto.MESSAGE, + number=4, + oneof="start_from", + message=timestamp_pb2.Timestamp, + ) + continuation_tokens: data.StreamContinuationTokens = proto.Field( + proto.MESSAGE, + number=6, + oneof="start_from", + message=data.StreamContinuationTokens, + ) + end_time: timestamp_pb2.Timestamp = proto.Field( + proto.MESSAGE, + number=5, + message=timestamp_pb2.Timestamp, + ) + heartbeat_duration: duration_pb2.Duration = proto.Field( + proto.MESSAGE, + number=7, + message=duration_pb2.Duration, + ) + + +class ReadChangeStreamResponse(proto.Message): + r"""NOTE: This API is intended to be used by Apache Beam + BigtableIO. Response message for Bigtable.ReadChangeStream. + + This message has `oneof`_ fields (mutually exclusive fields). + For each oneof, at most one member field can be set at the same time. + Setting any member of the oneof automatically clears all other + members. + + .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields + + Attributes: + data_change (google.cloud.bigtable_v2.types.ReadChangeStreamResponse.DataChange): + A mutation to the partition. + + This field is a member of `oneof`_ ``stream_record``. + heartbeat (google.cloud.bigtable_v2.types.ReadChangeStreamResponse.Heartbeat): + A periodic heartbeat message. + + This field is a member of `oneof`_ ``stream_record``. + close_stream (google.cloud.bigtable_v2.types.ReadChangeStreamResponse.CloseStream): + An indication that the stream should be + closed. + + This field is a member of `oneof`_ ``stream_record``. + """ + + class MutationChunk(proto.Message): + r"""A partial or complete mutation. + + Attributes: + chunk_info (google.cloud.bigtable_v2.types.ReadChangeStreamResponse.MutationChunk.ChunkInfo): + If set, then the mutation is a ``SetCell`` with a chunked + value across multiple messages. + mutation (google.cloud.bigtable_v2.types.Mutation): + If this is a continuation of a chunked message + (``chunked_value_offset`` > 0), ignore all fields except the + ``SetCell``'s value and merge it with the previous message + by concatenating the value fields. + """ + + class ChunkInfo(proto.Message): + r"""Information about the chunking of this mutation. Only ``SetCell`` + mutations can be chunked, and all chunks for a ``SetCell`` will be + delivered contiguously with no other mutation types interleaved. + + Attributes: + chunked_value_size (int): + The total value size of all the chunks that make up the + ``SetCell``. + chunked_value_offset (int): + The byte offset of this chunk into the total + value size of the mutation. + last_chunk (bool): + When true, this is the last chunk of a chunked ``SetCell``. + """ + + chunked_value_size: int = proto.Field( + proto.INT32, + number=1, + ) + chunked_value_offset: int = proto.Field( + proto.INT32, + number=2, + ) + last_chunk: bool = proto.Field( + proto.BOOL, + number=3, + ) + + chunk_info: "ReadChangeStreamResponse.MutationChunk.ChunkInfo" = proto.Field( + proto.MESSAGE, + number=1, + message="ReadChangeStreamResponse.MutationChunk.ChunkInfo", + ) + mutation: data.Mutation = proto.Field( + proto.MESSAGE, + number=2, + message=data.Mutation, + ) + + class DataChange(proto.Message): + r"""A message corresponding to one or more mutations to the partition + being streamed. A single logical ``DataChange`` message may also be + split across a sequence of multiple individual messages. Messages + other than the first in a sequence will only have the ``type`` and + ``chunks`` fields populated, with the final message in the sequence + also containing ``done`` set to true. + + Attributes: + type_ (google.cloud.bigtable_v2.types.ReadChangeStreamResponse.DataChange.Type): + The type of the mutation. + source_cluster_id (str): + The cluster where the mutation was applied. Not set when + ``type`` is ``GARBAGE_COLLECTION``. + row_key (bytes): + The row key for all mutations that are part of this + ``DataChange``. If the ``DataChange`` is chunked across + multiple messages, then this field will only be set for the + first message. + commit_timestamp (google.protobuf.timestamp_pb2.Timestamp): + The timestamp at which the mutation was + applied on the Bigtable server. + tiebreaker (int): + A value that lets stream consumers reconstruct Bigtable's + conflict resolution semantics. + https://cloud.google.com/bigtable/docs/writes#conflict-resolution + In the event that the same row key, column family, column + qualifier, timestamp are modified on different clusters at + the same ``commit_timestamp``, the mutation with the larger + ``tiebreaker`` will be the one chosen for the eventually + consistent state of the system. + chunks (MutableSequence[google.cloud.bigtable_v2.types.ReadChangeStreamResponse.MutationChunk]): + The mutations associated with this change to the partition. + May contain complete mutations or chunks of a multi-message + chunked ``DataChange`` record. + done (bool): + When true, indicates that the entire ``DataChange`` has been + read and the client can safely process the message. + token (str): + An encoded position for this stream's + partition to restart reading from. This token is + for the StreamPartition from the request. + estimated_low_watermark (google.protobuf.timestamp_pb2.Timestamp): + An estimate of the commit timestamp that is + usually lower than or equal to any timestamp for + a record that will be delivered in the future on + the stream. It is possible that, under + particular circumstances that a future record + has a timestamp is is lower than a previously + seen timestamp. For an example usage see + https://beam.apache.org/documentation/basics/#watermarks + """ + + class Type(proto.Enum): + r"""The type of mutation. + + Values: + TYPE_UNSPECIFIED (0): + The type is unspecified. + USER (1): + A user-initiated mutation. + GARBAGE_COLLECTION (2): + A system-initiated mutation as part of + garbage collection. + https://cloud.google.com/bigtable/docs/garbage-collection + CONTINUATION (3): + This is a continuation of a multi-message + change. + """ + TYPE_UNSPECIFIED = 0 + USER = 1 + GARBAGE_COLLECTION = 2 + CONTINUATION = 3 + + type_: "ReadChangeStreamResponse.DataChange.Type" = proto.Field( + proto.ENUM, + number=1, + enum="ReadChangeStreamResponse.DataChange.Type", + ) + source_cluster_id: str = proto.Field( + proto.STRING, + number=2, + ) + row_key: bytes = proto.Field( + proto.BYTES, + number=3, + ) + commit_timestamp: timestamp_pb2.Timestamp = proto.Field( + proto.MESSAGE, + number=4, + message=timestamp_pb2.Timestamp, + ) + tiebreaker: int = proto.Field( + proto.INT32, + number=5, + ) + chunks: MutableSequence[ + "ReadChangeStreamResponse.MutationChunk" + ] = proto.RepeatedField( + proto.MESSAGE, + number=6, + message="ReadChangeStreamResponse.MutationChunk", + ) + done: bool = proto.Field( + proto.BOOL, + number=8, + ) + token: str = proto.Field( + proto.STRING, + number=9, + ) + estimated_low_watermark: timestamp_pb2.Timestamp = proto.Field( + proto.MESSAGE, + number=10, + message=timestamp_pb2.Timestamp, + ) + + class Heartbeat(proto.Message): + r"""A periodic message with information that can be used to + checkpoint the state of a stream. + + Attributes: + continuation_token (google.cloud.bigtable_v2.types.StreamContinuationToken): + A token that can be provided to a subsequent + ``ReadChangeStream`` call to pick up reading at the current + stream position. + estimated_low_watermark (google.protobuf.timestamp_pb2.Timestamp): + An estimate of the commit timestamp that is + usually lower than or equal to any timestamp for + a record that will be delivered in the future on + the stream. It is possible that, under + particular circumstances that a future record + has a timestamp is is lower than a previously + seen timestamp. For an example usage see + https://beam.apache.org/documentation/basics/#watermarks + """ + + continuation_token: data.StreamContinuationToken = proto.Field( + proto.MESSAGE, + number=1, + message=data.StreamContinuationToken, + ) + estimated_low_watermark: timestamp_pb2.Timestamp = proto.Field( + proto.MESSAGE, + number=2, + message=timestamp_pb2.Timestamp, + ) + + class CloseStream(proto.Message): + r"""A message indicating that the client should stop reading from the + stream. If status is OK and ``continuation_tokens`` & + ``new_partitions`` are empty, the stream has finished (for example + if there was an ``end_time`` specified). If ``continuation_tokens`` + & ``new_partitions`` are present, then a change in partitioning + requires the client to open a new stream for each token to resume + reading. Example: [B, D) ends \| v new_partitions: [A, C) [C, E) + continuation_tokens.partitions: [B,C) [C,D) ^---^ ^---^ ^ ^ \| \| \| + StreamContinuationToken 2 \| StreamContinuationToken 1 To read the + new partition [A,C), supply the continuation tokens whose ranges + cover the new partition, for example ContinuationToken[A,B) & + ContinuationToken[B,C). + + Attributes: + status (google.rpc.status_pb2.Status): + The status of the stream. + continuation_tokens (MutableSequence[google.cloud.bigtable_v2.types.StreamContinuationToken]): + If non-empty, contains the information needed + to resume reading their associated partitions. + new_partitions (MutableSequence[google.cloud.bigtable_v2.types.StreamPartition]): + If non-empty, contains the new partitions to start reading + from, which are related to but not necessarily identical to + the partitions for the above ``continuation_tokens``. + """ + + status: status_pb2.Status = proto.Field( + proto.MESSAGE, + number=1, + message=status_pb2.Status, + ) + continuation_tokens: MutableSequence[ + data.StreamContinuationToken + ] = proto.RepeatedField( + proto.MESSAGE, + number=2, + message=data.StreamContinuationToken, + ) + new_partitions: MutableSequence[data.StreamPartition] = proto.RepeatedField( + proto.MESSAGE, + number=3, + message=data.StreamPartition, + ) + + data_change: DataChange = proto.Field( + proto.MESSAGE, + number=1, + oneof="stream_record", + message=DataChange, + ) + heartbeat: Heartbeat = proto.Field( + proto.MESSAGE, + number=2, + oneof="stream_record", + message=Heartbeat, + ) + close_stream: CloseStream = proto.Field( + proto.MESSAGE, + number=3, + oneof="stream_record", + message=CloseStream, + ) + + __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigtable_v2/types/data.py b/google/cloud/bigtable_v2/types/data.py index a3bec7274..8b74c8c70 100644 --- a/google/cloud/bigtable_v2/types/data.py +++ b/google/cloud/bigtable_v2/types/data.py @@ -33,6 +33,9 @@ "RowFilter", "Mutation", "ReadModifyWriteRule", + "StreamPartition", + "StreamContinuationTokens", + "StreamContinuationToken", }, ) @@ -1034,4 +1037,62 @@ class ReadModifyWriteRule(proto.Message): ) +class StreamPartition(proto.Message): + r"""NOTE: This API is intended to be used by Apache Beam + BigtableIO. A partition of a change stream. + + Attributes: + row_range (google.cloud.bigtable_v2.types.RowRange): + The row range covered by this partition and is specified by + [``start_key_closed``, ``end_key_open``). + """ + + row_range: "RowRange" = proto.Field( + proto.MESSAGE, + number=1, + message="RowRange", + ) + + +class StreamContinuationTokens(proto.Message): + r"""NOTE: This API is intended to be used by Apache Beam BigtableIO. The + information required to continue reading the data from multiple + ``StreamPartitions`` from where a previous read left off. + + Attributes: + tokens (MutableSequence[google.cloud.bigtable_v2.types.StreamContinuationToken]): + List of continuation tokens. + """ + + tokens: MutableSequence["StreamContinuationToken"] = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="StreamContinuationToken", + ) + + +class StreamContinuationToken(proto.Message): + r"""NOTE: This API is intended to be used by Apache Beam BigtableIO. The + information required to continue reading the data from a + ``StreamPartition`` from where a previous read left off. + + Attributes: + partition (google.cloud.bigtable_v2.types.StreamPartition): + The partition that this token applies to. + token (str): + An encoded position in the stream to restart + reading from. + """ + + partition: "StreamPartition" = proto.Field( + proto.MESSAGE, + number=1, + message="StreamPartition", + ) + token: str = proto.Field( + proto.STRING, + number=2, + ) + + __all__ = tuple(sorted(__protobuf__.manifest)) From 72f7d0ef40dcd622e4c94c315ec8f69dc7072323 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 6 Mar 2023 13:27:41 -0800 Subject: [PATCH 019/349] added pooled transport class --- .../bigtable_v2/services/bigtable/client.py | 2 + .../transports/pooled_grpc_asyncio.py | 538 ++++++++++++++++++ 2 files changed, 540 insertions(+) create mode 100644 google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py diff --git a/google/cloud/bigtable_v2/services/bigtable/client.py b/google/cloud/bigtable_v2/services/bigtable/client.py index 53a0aa8f7..e5131eebf 100644 --- a/google/cloud/bigtable_v2/services/bigtable/client.py +++ b/google/cloud/bigtable_v2/services/bigtable/client.py @@ -53,6 +53,7 @@ from .transports.base import BigtableTransport, DEFAULT_CLIENT_INFO from .transports.grpc import BigtableGrpcTransport from .transports.grpc_asyncio import BigtableGrpcAsyncIOTransport +from .transports.pooled_grpc_asyncio import PooledBigtableGrpcAsyncIOTransport class BigtableClientMeta(type): @@ -66,6 +67,7 @@ class BigtableClientMeta(type): _transport_registry = OrderedDict() # type: Dict[str, Type[BigtableTransport]] _transport_registry["grpc"] = BigtableGrpcTransport _transport_registry["grpc_asyncio"] = BigtableGrpcAsyncIOTransport + _transport_registry["pooled_grpc_asyncio"] = PooledBigtableGrpcAsyncIOTransport def get_transport_class( cls, diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py new file mode 100644 index 000000000..5b1b90dcd --- /dev/null +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -0,0 +1,538 @@ +# -*- coding: utf-8 -*- +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import warnings +from typing import Awaitable, Callable, Dict, Optional, Sequence, Tuple, Union, List + +from google.api_core import gapic_v1 +from google.api_core import grpc_helpers_async +from google.auth import credentials as ga_credentials # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore + +import grpc # type: ignore +from grpc.experimental import aio # type: ignore + +from google.cloud.bigtable_v2.types import bigtable +from .base import BigtableTransport, DEFAULT_CLIENT_INFO +from .grpc import BigtableGrpcTransport + + +class PooledBigtableGrpcAsyncIOTransport(BigtableTransport): + """Pooled gRPC AsyncIO backend transport for Bigtable. + + Service for reading from and writing to existing Bigtable + tables. + + This class defines the same methods as the primary client, so the + primary client can load the underlying transport implementation + and call it. + + It sends protocol buffers over the wire using gRPC (which is built on + top of HTTP/2); the ``grpcio`` package must be installed. + + This class allows channel pooling, so multiple channels can be used concurrently + when making requests. Channels are rotated in a round-robin fashion. + """ + + _channel_pool: List[aio.Channel] = [] + _stubs: Dict[Tuple[aio.Channel, str], Callable] = {} + _next_idx: int = 0 + + @classmethod + def create_channel( + cls, + host: str = "bigtable.googleapis.com", + credentials: Optional[ga_credentials.Credentials] = None, + credentials_file: Optional[str] = None, + scopes: Optional[Sequence[str]] = None, + quota_project_id: Optional[str] = None, + **kwargs, + ) -> aio.Channel: + """Create and return a gRPC AsyncIO channel object. + Args: + host (Optional[str]): The host for the channel to use. + credentials (Optional[~.Credentials]): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + kwargs (Optional[dict]): Keyword arguments, which are passed to the + channel creation. + Returns: + aio.Channel: A gRPC AsyncIO channel object. + """ + + return grpc_helpers_async.create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + quota_project_id=quota_project_id, + default_scopes=cls.AUTH_SCOPES, + scopes=scopes, + default_host=cls.DEFAULT_HOST, + **kwargs, + ) + + def __init__( + self, + *, + pool_size: int = 3, + host: str = "bigtable.googleapis.com", + credentials: Optional[ga_credentials.Credentials] = None, + credentials_file: Optional[str] = None, + scopes: Optional[Sequence[str]] = None, + api_mtls_endpoint: Optional[str] = None, + client_cert_source: Optional[Callable[[], Tuple[bytes, bytes]]] = None, + ssl_channel_credentials: Optional[grpc.ChannelCredentials] = None, + client_cert_source_for_mtls: Optional[Callable[[], Tuple[bytes, bytes]]] = None, + quota_project_id: Optional[str] = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + always_use_jwt_access: Optional[bool] = False, + api_audience: Optional[str] = None, + ) -> None: + """Instantiate the transport. + + Args: + pool_size (int): the number of grpc channels to maintain in a pool + host (Optional[str]): + The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is ignored if ``channel`` is provided. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + api_mtls_endpoint (Optional[str]): Deprecated. The mutual TLS endpoint. + If provided, it overrides the ``host`` argument and tries to create + a mutual TLS channel with client SSL credentials from + ``client_cert_source`` or application default SSL credentials. + client_cert_source (Optional[Callable[[], Tuple[bytes, bytes]]]): + Deprecated. A callback to provide client SSL certificate bytes and + private key bytes, both in PEM format. It is ignored if + ``api_mtls_endpoint`` is None. + ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials + for the grpc channel. It is ignored if ``channel`` is provided. + client_cert_source_for_mtls (Optional[Callable[[], Tuple[bytes, bytes]]]): + A callback to provide client certificate bytes and private key bytes, + both in PEM format. It is used to configure a mutual TLS channel. It is + ignored if ``channel`` or ``ssl_channel_credentials`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + always_use_jwt_access (Optional[bool]): Whether self signed JWT should + be used for service account credentials. + + Raises: + google.auth.exceptions.MutualTlsChannelError: If mutual TLS transport + creation failed for any reason. + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + self._ssl_channel_credentials = ssl_channel_credentials + self._stubs: Dict[str, Callable] = {} + + if api_mtls_endpoint: + warnings.warn("api_mtls_endpoint is deprecated", DeprecationWarning) + if client_cert_source: + warnings.warn("client_cert_source is deprecated", DeprecationWarning) + + if api_mtls_endpoint: + host = api_mtls_endpoint + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + self._ssl_channel_credentials = SslCredentials().ssl_credentials + + else: + if client_cert_source_for_mtls and not ssl_channel_credentials: + cert, key = client_cert_source_for_mtls() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + + # The base transport sets the host, credentials and scopes + super().__init__( + host=host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + client_info=client_info, + always_use_jwt_access=always_use_jwt_access, + api_audience=api_audience, + ) + self._channel_pool = [] + for i in range(pool_size): + new_channel = type(self).create_channel( + self._host, + # use the credentials which are saved + credentials=self._credentials, + # Set ``credentials_file`` to ``None`` here as + # the credentials that we saved earlier should be used. + credentials_file=None, + scopes=self._scopes, + ssl_credentials=self._ssl_channel_credentials, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + self._channel_pool.append(new_channel) + + # Wrap messages. This must be done after self._channel_pool is populated + self._prep_wrapped_messages(client_info) + + def next_channel(self) -> aio.Channel: + """Returns the next channel in the round robin pool.""" + # Return the channel from cache. + channel = self._channel_pool[self._next_idx] + self._next_idx = (self._next_idx + 1) % len(self._channel_pool) + return channel + + @property + def read_rows( + self, + ) -> Callable[[bigtable.ReadRowsRequest], Awaitable[bigtable.ReadRowsResponse]]: + r"""Return a callable for the read rows method over gRPC. + + Streams back the contents of all requested rows in + key order, optionally applying the same Reader filter to + each. Depending on their size, rows and cells may be + broken up across multiple responses, but atomicity of + each row will still be preserved. See the + ReadRowsResponse documentation for details. + + Returns: + Callable[[~.ReadRowsRequest], + Awaitable[~.ReadRowsResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + next_channel = self.next_channel() + print(f"USING CHANNEL: {self._next_idx}") + stub_key = (next_channel, "read_rows") + if stub_key not in self._stubs: + self._stubs[stub_key] = next_channel.unary_stream( + "/google.bigtable.v2.Bigtable/ReadRows", + request_serializer=bigtable.ReadRowsRequest.serialize, + response_deserializer=bigtable.ReadRowsResponse.deserialize, + ) + return self._stubs[stub_key] + + @property + def sample_row_keys( + self, + ) -> Callable[ + [bigtable.SampleRowKeysRequest], Awaitable[bigtable.SampleRowKeysResponse] + ]: + r"""Return a callable for the sample row keys method over gRPC. + + Returns a sample of row keys in the table. The + returned row keys will delimit contiguous sections of + the table of approximately equal size, which can be used + to break up the data for distributed tasks like + mapreduces. + + Returns: + Callable[[~.SampleRowKeysRequest], + Awaitable[~.SampleRowKeysResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + next_channel = self.next_channel() + print(f"USING CHANNEL: {self._next_idx}") + stub_key = (next_channel, "sample_row_keys") + if stub_key not in self._stubs: + self._stubs[stub_key] = next_channel.unary_stream( + "/google.bigtable.v2.Bigtable/SampleRowKeys", + request_serializer=bigtable.SampleRowKeysRequest.serialize, + response_deserializer=bigtable.SampleRowKeysResponse.deserialize, + ) + return self._stubs[stub_key] + + @property + def mutate_row( + self, + ) -> Callable[[bigtable.MutateRowRequest], Awaitable[bigtable.MutateRowResponse]]: + r"""Return a callable for the mutate row method over gRPC. + + Mutates a row atomically. Cells already present in the row are + left unchanged unless explicitly changed by ``mutation``. + + Returns: + Callable[[~.MutateRowRequest], + Awaitable[~.MutateRowResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + next_channel = self.next_channel() + print(f"USING CHANNEL: {self._next_idx}") + stub_key = (next_channel, "mutate_row") + if stub_key not in self._stubs: + self._stubs[stub_key] = next_channel.unary_unary( + "/google.bigtable.v2.Bigtable/MutateRow", + request_serializer=bigtable.MutateRowRequest.serialize, + response_deserializer=bigtable.MutateRowResponse.deserialize, + ) + return self._stubs[stub_key] + + @property + def mutate_rows( + self, + ) -> Callable[[bigtable.MutateRowsRequest], Awaitable[bigtable.MutateRowsResponse]]: + r"""Return a callable for the mutate rows method over gRPC. + + Mutates multiple rows in a batch. Each individual row + is mutated atomically as in MutateRow, but the entire + batch is not executed atomically. + + Returns: + Callable[[~.MutateRowsRequest], + Awaitable[~.MutateRowsResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + next_channel = self.next_channel() + print(f"USING CHANNEL: {self._next_idx}") + stub_key = (next_channel, "mutate_rows") + if stub_key not in self._stubs: + self._stubs[stub_key] = next_channel.unary_stream( + "/google.bigtable.v2.Bigtable/MutateRows", + request_serializer=bigtable.MutateRowsRequest.serialize, + response_deserializer=bigtable.MutateRowsResponse.deserialize, + ) + return self._stubs[stub_key] + + @property + def check_and_mutate_row( + self, + ) -> Callable[ + [bigtable.CheckAndMutateRowRequest], + Awaitable[bigtable.CheckAndMutateRowResponse], + ]: + r"""Return a callable for the check and mutate row method over gRPC. + + Mutates a row atomically based on the output of a + predicate Reader filter. + + Returns: + Callable[[~.CheckAndMutateRowRequest], + Awaitable[~.CheckAndMutateRowResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + next_channel = self.next_channel() + print(f"USING CHANNEL: {self._next_idx}") + stub_key = (next_channel, "check_and_mutate_row") + if stub_key not in self._stubs: + self._stubs[stub_key] = next_channel.unary_unary( + "/google.bigtable.v2.Bigtable/CheckAndMutateRow", + request_serializer=bigtable.CheckAndMutateRowRequest.serialize, + response_deserializer=bigtable.CheckAndMutateRowResponse.deserialize, + ) + return self._stubs[stub_key] + + @property + def ping_and_warm( + self, + ) -> Callable[ + [bigtable.PingAndWarmRequest], Awaitable[bigtable.PingAndWarmResponse] + ]: + r"""Return a callable for the ping and warm method over gRPC. + + Warm up associated instance metadata for this + connection. This call is not required but may be useful + for connection keep-alive. + + Returns: + Callable[[~.PingAndWarmRequest], + Awaitable[~.PingAndWarmResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + next_channel = self.next_channel() + print(f"USING CHANNEL: {self._next_idx}") + stub_key = (next_channel, "ping_and_warm") + if stub_key not in self._stubs: + self._stubs[stub_key] = next_channel.unary_unary( + "/google.bigtable.v2.Bigtable/PingAndWarm", + request_serializer=bigtable.PingAndWarmRequest.serialize, + response_deserializer=bigtable.PingAndWarmResponse.deserialize, + ) + return self._stubs[stub_key] + + @property + def read_modify_write_row( + self, + ) -> Callable[ + [bigtable.ReadModifyWriteRowRequest], + Awaitable[bigtable.ReadModifyWriteRowResponse], + ]: + r"""Return a callable for the read modify write row method over gRPC. + + Modifies a row atomically on the server. The method + reads the latest existing timestamp and value from the + specified columns and writes a new entry based on + pre-defined read/modify/write rules. The new value for + the timestamp is the greater of the existing timestamp + or the current server time. The method returns the new + contents of all modified cells. + + Returns: + Callable[[~.ReadModifyWriteRowRequest], + Awaitable[~.ReadModifyWriteRowResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + next_channel = self.next_channel() + print(f"USING CHANNEL: {self._next_idx}") + stub_key = (next_channel, "read_modify_write_row") + if stub_key not in self._stubs: + self._stubs[stub_key] = next_channel.unary_unary( + "/google.bigtable.v2.Bigtable/ReadModifyWriteRow", + request_serializer=bigtable.ReadModifyWriteRowRequest.serialize, + response_deserializer=bigtable.ReadModifyWriteRowResponse.deserialize, + ) + return self._stubs[stub_key] + + @property + def generate_initial_change_stream_partitions( + self, + ) -> Callable[ + [bigtable.GenerateInitialChangeStreamPartitionsRequest], + Awaitable[bigtable.GenerateInitialChangeStreamPartitionsResponse], + ]: + r"""Return a callable for the generate initial change stream + partitions method over gRPC. + + NOTE: This API is intended to be used by Apache Beam BigtableIO. + Returns the current list of partitions that make up the table's + change stream. The union of partitions will cover the entire + keyspace. Partitions can be read with ``ReadChangeStream``. + + Returns: + Callable[[~.GenerateInitialChangeStreamPartitionsRequest], + Awaitable[~.GenerateInitialChangeStreamPartitionsResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + next_channel = self.next_channel() + print(f"USING CHANNEL: {self._next_idx}") + stub_key = (next_channel, "generate_initial_change_stream_partitions") + if stub_key not in self._stubs: + self._stubs[stub_key] = next_channel.unary_stream( + "/google.bigtable.v2.Bigtable/GenerateInitialChangeStreamPartitions", + request_serializer=bigtable.GenerateInitialChangeStreamPartitionsRequest.serialize, + response_deserializer=bigtable.GenerateInitialChangeStreamPartitionsResponse.deserialize, + ) + return self._stubs[stub_key] + + @property + def read_change_stream( + self, + ) -> Callable[ + [bigtable.ReadChangeStreamRequest], Awaitable[bigtable.ReadChangeStreamResponse] + ]: + r"""Return a callable for the read change stream method over gRPC. + + NOTE: This API is intended to be used by Apache Beam + BigtableIO. Reads changes from a table's change stream. + Changes will reflect both user-initiated mutations and + mutations that are caused by garbage collection. + + Returns: + Callable[[~.ReadChangeStreamRequest], + Awaitable[~.ReadChangeStreamResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + next_channel = self.next_channel() + print(f"USING CHANNEL: {self._next_idx}") + stub_key = (next_channel, "read_change_stream") + if stub_key not in self._stubs: + self._stubs[stub_key] = next_channel.unary_stream( + "/google.bigtable.v2.Bigtable/ReadChangeStream", + request_serializer=bigtable.ReadChangeStreamRequest.serialize, + response_deserializer=bigtable.ReadChangeStreamResponse.deserialize, + ) + return self._stubs[stub_key] + + def close(self): + for channel in self._channel_pool: + channel.close() + + +__all__ = ("PooledBigtableGrpcAsyncIOTransport",) From be3de7a10a864d33c2bf5c652c96896a46700858 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 6 Mar 2023 14:22:08 -0800 Subject: [PATCH 020/349] added functions to get and replace channels in pool --- gapic-generator-fork | 2 +- .../transports/pooled_grpc_asyncio.py | 49 ++++++++++++++++++- 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 8e3637ed7..97f1eb643 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 8e3637ed74f99f09d1e5941531a8836459ef1df3 +Subproject commit 97f1eb643260b62311ae2d9a1b555ac2d3661ed2 diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index 5b1b90dcd..a618dcb9e 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -197,6 +197,7 @@ def __init__( always_use_jwt_access=always_use_jwt_access, api_audience=api_audience, ) + self._quota_project_id = quota_project_id self._channel_pool = [] for i in range(pool_size): new_channel = type(self).create_channel( @@ -208,7 +209,7 @@ def __init__( credentials_file=None, scopes=self._scopes, ssl_credentials=self._ssl_channel_credentials, - quota_project_id=quota_project_id, + quota_project_id=self._quota_project_id, options=[ ("grpc.max_send_message_length", -1), ("grpc.max_receive_message_length", -1), @@ -226,6 +227,52 @@ def next_channel(self) -> aio.Channel: self._next_idx = (self._next_idx + 1) % len(self._channel_pool) return channel + def get_channel(self, channel_idx) -> aio.Channel: + """Returns the a specified channel from the pool. + + Args: + channel_idx(int): the requested channel index + """ + # Return the channel from cache. + return self._channel_pool[self._next_idx] + + async def replace_channel( + self, channel_idx, grace=None, new_channel=None + ) -> aio.Channel: + """ + Immediately closes a channel in the pool, and replaces it with a new one. + Returns the newly created channel + + Args: + channel_idx(int): the channel index in the pool to replace + grace(Optional[float]): The time to wait until all active RPCs are + finished. If a grace period is not specified (by passing None for + grace), all existing RPCs are cancelled immediately. + new_channel(asyncio.Channel): a new channel to insert into the pool + at `channel_idx`. If `None`, a new channel will be created. + """ + if new_channel is None: + new_channel = self.create_channel( + self._host, + credentials=self._credentials, + credentials_file=None, + scopes=self._scopes, + ssl_credentials=self._ssl_channel_credentials, + quota_project_id=self._quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + old_channel = self._channel_pool[channel_idx] + self._channel_pool[channel_idx] = new_channel + await old_channel.close(grace=grace) + # invalidate stubs + for stub_channel, stub_func in self._stubs.keys(): + if stub_channel == old_channel: + del self._stubs[(stub_channel, stub_func)] + return new_channel + @property def read_rows( self, From f6c7f361c98a8adc261d7bd680d42d4054767d92 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 6 Mar 2023 14:45:20 -0800 Subject: [PATCH 021/349] added client init implementation --- google/cloud/bigtable/client.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 93379cbb0..41aaf70ee 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -15,11 +15,11 @@ from __future__ import annotations -from typing import Any, AsyncIterable, TYPE_CHECKING +from typing import cast, Any, AsyncIterable, Optional, TYPE_CHECKING +from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient from google.cloud.client import ClientWithProject - import google.auth.credentials if TYPE_CHECKING: @@ -49,7 +49,21 @@ def __init__( Args: metadata: a list of metadata headers to be attached to all calls with this client """ - pass + super(BigtableDataClient, self).__init__( + project=project, + credentials=credentials, + client_options=client_options, + ) + if type(client_options) is dict: + client_options = google.api_core.client_options.from_dict(client_options) + client_options = cast( + Optional["google.api_core.client_options.ClientOptions"], client_options + ) + self._gapic_client = BigtableAsyncClient( + credentials=credentials, + transport="pooled_grpc_asyncio", + client_options=client_options, + ) def get_table( self, instance_id: str, table_id: str, app_profile_id: str | None = None From 44e76c1200ee0edecc91e560ae635a466db16e52 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 6 Mar 2023 15:59:07 -0800 Subject: [PATCH 022/349] added channel management to table object --- gapic-generator-fork | 2 +- google/cloud/bigtable/client.py | 56 +++++++++++++++++-- .../transports/pooled_grpc_asyncio.py | 3 +- 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 97f1eb643..8d15882f8 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 97f1eb643260b62311ae2d9a1b555ac2d3661ed2 +Subproject commit 8d15882f8fab48499bf3a18d7b505f38c3ee12d7 diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 41aaf70ee..d4090aa0b 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -17,7 +17,13 @@ from typing import cast, Any, AsyncIterable, Optional, TYPE_CHECKING +import asyncio +import grpc + from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient +from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, +) from google.cloud.client import ClientWithProject import google.auth.credentials @@ -64,11 +70,25 @@ def __init__( transport="pooled_grpc_asyncio", client_options=client_options, ) + self.transport: PooledBigtableGrpcAsyncIOTransport = cast( + PooledBigtableGrpcAsyncIOTransport, self._gapic_client.transport + ) - def get_table( - self, instance_id: str, table_id: str, app_profile_id: str | None = None + async def get_table( + self, + instance_id: str, + table_id: str, + app_profile_id: str | None = None, + manage_channels: bool = True, ) -> Table: - return Table(self, instance_id, table_id, app_profile_id) + table = Table(self, instance_id, table_id, app_profile_id) + if manage_channels: + for channel_idx in range(self.transport.pool_size): + channel = self.transport.get_channel(channel_idx) + await table._ping_and_warm_channel(channel) + refresh_task = asyncio.create_task(table._manage_channel(channel_idx)) + table._channel_refresh_tasks.append(refresh_task) + return table class Table: @@ -86,7 +106,35 @@ def __init__( table_id: str, app_profile_id: str | None = None, ): - raise NotImplementedError + self.client = client + self.instance_id = instance_id + self.table_id = table_id + self.app_profile_id = app_profile_id + self._channel_refresh_tasks: list[asyncio.Task[None]] = [] + + async def _manage_channel( + self, channel_idx: int, refresh_interval: int | float = 60 * 45 + ) -> None: + MAX_REFRESH_TIME = 60 * 60 + while True: + await asyncio.sleep(refresh_interval) + new_channel = await self.client.transport.replace_channel( + channel_idx, MAX_REFRESH_TIME - refresh_interval + ) + # warm caches on new client + await self._ping_and_warm_channel(new_channel) + + async def _ping_and_warm_channel(self, channel: grpc.aio.Channel) -> None: + ping_rpc = channel.unary_unary( + "/google.bigtable.v2.Bigtable/PingAndWarmChannel" + ) + await ping_rpc( + { + "name": self.client._gapic_client.instance_path( + self.client.project, self.instance_id + ) + } + ) async def read_rows_stream( self, diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index a618dcb9e..b41e437a7 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -198,6 +198,7 @@ def __init__( api_audience=api_audience, ) self._quota_project_id = quota_project_id + self.pool_size = pool_size self._channel_pool = [] for i in range(pool_size): new_channel = type(self).create_channel( @@ -248,7 +249,7 @@ async def replace_channel( grace(Optional[float]): The time to wait until all active RPCs are finished. If a grace period is not specified (by passing None for grace), all existing RPCs are cancelled immediately. - new_channel(asyncio.Channel): a new channel to insert into the pool + new_channel(grpc.aio.Channel): a new channel to insert into the pool at `channel_idx`. If `None`, a new channel will be created. """ if new_channel is None: From c4d537e0b74e3d7f0d77ee041ff8e3a865e51c20 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 6 Mar 2023 16:50:59 -0800 Subject: [PATCH 023/349] refactoring channel management --- google/cloud/bigtable/client.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index d4090aa0b..39f7ec8c0 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -19,6 +19,7 @@ import asyncio import grpc +import time from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( @@ -84,8 +85,6 @@ async def get_table( table = Table(self, instance_id, table_id, app_profile_id) if manage_channels: for channel_idx in range(self.transport.pool_size): - channel = self.transport.get_channel(channel_idx) - await table._ping_and_warm_channel(channel) refresh_task = asyncio.create_task(table._manage_channel(channel_idx)) table._channel_refresh_tasks.append(refresh_task) return table @@ -113,16 +112,24 @@ def __init__( self._channel_refresh_tasks: list[asyncio.Task[None]] = [] async def _manage_channel( - self, channel_idx: int, refresh_interval: int | float = 60 * 45 + self, + channel_idx: int, + refresh_interval: float = 60 * 45, + grace_period: float = 60 * 15, ) -> None: - MAX_REFRESH_TIME = 60 * 60 + channel = self.client.transport.get_channel(channel_idx) + start_timestamp = time.time() while True: - await asyncio.sleep(refresh_interval) - new_channel = await self.client.transport.replace_channel( - channel_idx, MAX_REFRESH_TIME - refresh_interval + # warm caches on new channel + await self._ping_and_warm_channel(channel) + # let channel serve rpcs until expirary + next_sleep = refresh_interval - (time.time() - start_timestamp) + await asyncio.sleep(next_sleep) + start_timestamp = time.time() + # cycle channel out of use, with long grace window + channel = await self.client.transport.replace_channel( + channel_idx, grace_period ) - # warm caches on new client - await self._ping_and_warm_channel(new_channel) async def _ping_and_warm_channel(self, channel: grpc.aio.Channel) -> None: ping_rpc = channel.unary_unary( From 99a49a42ebcba6d779a65a9f3365977d0bb72b01 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 6 Mar 2023 17:05:59 -0800 Subject: [PATCH 024/349] ping new channel before replacement --- google/cloud/bigtable/client.py | 49 +++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 39f7ec8c0..5338057dc 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -117,21 +117,54 @@ async def _manage_channel( refresh_interval: float = 60 * 45, grace_period: float = 60 * 15, ) -> None: + """ + Warms and periodically refreshes an internal grpc channel used for requests + + The backend will automatically close channels after 60 minutes, so + `refresh_interval` + `grace_period` should be < 60 minutes + + Args: + channel_idx: index of the channel in the transport's channel pool + refresh_interval: interval before initiating refresh process in seconds + grace_period: time to allow previous channel to serve existing + requests before closing, in seconds + """ + # warm the current channel immidiately channel = self.client.transport.get_channel(channel_idx) - start_timestamp = time.time() + await self._ping_and_warm_channel(channel) + next_sleep = refresh_interval + # continuously refrech the channel every `refresh_interval` seconds while True: - # warm caches on new channel - await self._ping_and_warm_channel(channel) - # let channel serve rpcs until expirary - next_sleep = refresh_interval - (time.time() - start_timestamp) await asyncio.sleep(next_sleep) + # prepare new channel for use + new_channel = self.client.transport.create_channel( + self.client.transport._host, + credentials=self.client.transport._credentials, + credentials_file=None, + scopes=self.client.transport._scopes, + ssl_credentials=self.client.transport._ssl_channel_credentials, + quota_project_id=self.client.transport._quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + await self._ping_and_warm_channel(channel) + # cycle channel out of use, with long grace window before closure start_timestamp = time.time() - # cycle channel out of use, with long grace window - channel = await self.client.transport.replace_channel( - channel_idx, grace_period + await self.client.transport.replace_channel( + channel_idx, grace_period, new_channel ) + # subtract the time spent waiting for the channel to be replaced + next_sleep = refresh_interval - (time.time() - start_timestamp) async def _ping_and_warm_channel(self, channel: grpc.aio.Channel) -> None: + """ + Prepares the backend for requests on a channel + + Args: + channel: grpc channel to ping + """ ping_rpc = channel.unary_unary( "/google.bigtable.v2.Bigtable/PingAndWarmChannel" ) From bd4fb5e9dab59690fe3c6f8e72a5662152fa7bdf Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 6 Mar 2023 17:13:13 -0800 Subject: [PATCH 025/349] made channel pool public --- gapic-generator-fork | 2 +- google/cloud/bigtable/client.py | 4 +- .../transports/pooled_grpc_asyncio.py | 37 +++++++------------ 3 files changed, 16 insertions(+), 27 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 8d15882f8..03fa6f3c5 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 8d15882f8fab48499bf3a18d7b505f38c3ee12d7 +Subproject commit 03fa6f3c5ebac267ba97ecd9616f146bed1f1c4b diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 5338057dc..8c265f488 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -84,7 +84,7 @@ async def get_table( ) -> Table: table = Table(self, instance_id, table_id, app_profile_id) if manage_channels: - for channel_idx in range(self.transport.pool_size): + for channel_idx in range(len(self.transport.channel_pool)): refresh_task = asyncio.create_task(table._manage_channel(channel_idx)) table._channel_refresh_tasks.append(refresh_task) return table @@ -130,7 +130,7 @@ async def _manage_channel( requests before closing, in seconds """ # warm the current channel immidiately - channel = self.client.transport.get_channel(channel_idx) + channel = self.client.transport.channel_pool[channel_idx] await self._ping_and_warm_channel(channel) next_sleep = refresh_interval # continuously refrech the channel every `refresh_interval` seconds diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index b41e437a7..bfccfaae3 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -46,10 +46,6 @@ class PooledBigtableGrpcAsyncIOTransport(BigtableTransport): when making requests. Channels are rotated in a round-robin fashion. """ - _channel_pool: List[aio.Channel] = [] - _stubs: Dict[Tuple[aio.Channel, str], Callable] = {} - _next_idx: int = 0 - @classmethod def create_channel( cls, @@ -159,7 +155,8 @@ def __init__( and ``credentials_file`` are passed. """ self._ssl_channel_credentials = ssl_channel_credentials - self._stubs: Dict[str, Callable] = {} + self._stubs: Dict[Tuple[aio.Channel, str], Callable] = {} + self._next_idx = 0 if api_mtls_endpoint: warnings.warn("api_mtls_endpoint is deprecated", DeprecationWarning) @@ -198,8 +195,7 @@ def __init__( api_audience=api_audience, ) self._quota_project_id = quota_project_id - self.pool_size = pool_size - self._channel_pool = [] + self.channel_pool: List[aio.Channel] = [] for i in range(pool_size): new_channel = type(self).create_channel( self._host, @@ -216,33 +212,26 @@ def __init__( ("grpc.max_receive_message_length", -1), ], ) - self._channel_pool.append(new_channel) + self.channel_pool.append(new_channel) - # Wrap messages. This must be done after self._channel_pool is populated + # Wrap messages. This must be done after self.channel_pool is populated self._prep_wrapped_messages(client_info) def next_channel(self) -> aio.Channel: """Returns the next channel in the round robin pool.""" # Return the channel from cache. - channel = self._channel_pool[self._next_idx] - self._next_idx = (self._next_idx + 1) % len(self._channel_pool) + channel = self.channel_pool[self._next_idx] + self._next_idx = (self._next_idx + 1) % len(self.channel_pool) return channel - def get_channel(self, channel_idx) -> aio.Channel: - """Returns the a specified channel from the pool. - - Args: - channel_idx(int): the requested channel index - """ - # Return the channel from cache. - return self._channel_pool[self._next_idx] - async def replace_channel( self, channel_idx, grace=None, new_channel=None ) -> aio.Channel: """ - Immediately closes a channel in the pool, and replaces it with a new one. - Returns the newly created channel + Replaces a channel in the pool with a fresh one. + + The `new_channel` will start processing new requests immidiately, + but the old channel will continue serving existing clients for `grace` seconds Args: channel_idx(int): the channel index in the pool to replace @@ -265,8 +254,8 @@ async def replace_channel( ("grpc.max_receive_message_length", -1), ], ) - old_channel = self._channel_pool[channel_idx] - self._channel_pool[channel_idx] = new_channel + old_channel = self.channel_pool[channel_idx] + self.channel_pool[channel_idx] = new_channel await old_channel.close(grace=grace) # invalidate stubs for stub_channel, stub_func in self._stubs.keys(): From 6b13c29d47a83d7977ba09e5d6c589cf3f0f6aa2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 11:27:03 -0800 Subject: [PATCH 026/349] moved channel refresh logic into shared client --- google/cloud/bigtable/client.py | 147 +++++++++++++++++++------------- 1 file changed, 90 insertions(+), 57 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 8c265f488..5ac303305 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -74,42 +74,27 @@ def __init__( self.transport: PooledBigtableGrpcAsyncIOTransport = cast( PooledBigtableGrpcAsyncIOTransport, self._gapic_client.transport ) + self._active_instances: set[str] = set() + # background tasks will be started when an instance is registered + # with the client in `get_table` + self._channel_refresh_tasks: list[asyncio.Task[None]] = [] - async def get_table( - self, - instance_id: str, - table_id: str, - app_profile_id: str | None = None, - manage_channels: bool = True, - ) -> Table: - table = Table(self, instance_id, table_id, app_profile_id) - if manage_channels: - for channel_idx in range(len(self.transport.channel_pool)): - refresh_task = asyncio.create_task(table._manage_channel(channel_idx)) - table._channel_refresh_tasks.append(refresh_task) - return table - - -class Table: - """ - Main Data API surface + async def _ping_and_warm_channel(self, channel: grpc.aio.Channel) -> None: + """ + Prepares the backend for requests on a channel - Table object maintains instance_id, table_id, and app_profile_id context, and passes them with - each call - """ + Pings each Bigtable instance registered in `_active_instances` on the client - def __init__( - self, - client: BigtableDataClient, - instance_id: str, - table_id: str, - app_profile_id: str | None = None, - ): - self.client = client - self.instance_id = instance_id - self.table_id = table_id - self.app_profile_id = app_profile_id - self._channel_refresh_tasks: list[asyncio.Task[None]] = [] + Args: + channel: grpc channel to ping + Returns: + - squence of results or exceptions from the ping requests + """ + ping_rpc = channel.unary_unary( + "/google.bigtable.v2.Bigtable/PingAndWarmChannel" + ) + tasks = [ping_rpc({"name": n}) for n in self._active_instances] + return await asyncio.gather(*tasks, return_exceptions=True) async def _manage_channel( self, @@ -118,11 +103,13 @@ async def _manage_channel( grace_period: float = 60 * 15, ) -> None: """ - Warms and periodically refreshes an internal grpc channel used for requests + Background coroutine that periodically refreshes and warms a grpc channel The backend will automatically close channels after 60 minutes, so `refresh_interval` + `grace_period` should be < 60 minutes + Runs continuously until the client is closed + Args: channel_idx: index of the channel in the transport's channel pool refresh_interval: interval before initiating refresh process in seconds @@ -130,20 +117,20 @@ async def _manage_channel( requests before closing, in seconds """ # warm the current channel immidiately - channel = self.client.transport.channel_pool[channel_idx] + channel = self.transport.channel_pool[channel_idx] await self._ping_and_warm_channel(channel) next_sleep = refresh_interval # continuously refrech the channel every `refresh_interval` seconds while True: await asyncio.sleep(next_sleep) # prepare new channel for use - new_channel = self.client.transport.create_channel( - self.client.transport._host, - credentials=self.client.transport._credentials, + new_channel = self.transport.create_channel( + self.transport._host, + credentials=self.transport._credentials, credentials_file=None, - scopes=self.client.transport._scopes, - ssl_credentials=self.client.transport._ssl_channel_credentials, - quota_project_id=self.client.transport._quota_project_id, + scopes=self.transport._scopes, + ssl_credentials=self.transport._ssl_channel_credentials, + quota_project_id=self.transport._quota_project_id, options=[ ("grpc.max_send_message_length", -1), ("grpc.max_receive_message_length", -1), @@ -152,29 +139,75 @@ async def _manage_channel( await self._ping_and_warm_channel(channel) # cycle channel out of use, with long grace window before closure start_timestamp = time.time() - await self.client.transport.replace_channel( - channel_idx, grace_period, new_channel - ) + await self.transport.replace_channel(channel_idx, grace_period, new_channel) # subtract the time spent waiting for the channel to be replaced next_sleep = refresh_interval - (time.time() - start_timestamp) - async def _ping_and_warm_channel(self, channel: grpc.aio.Channel) -> None: + async def register_instance(self, instance_id: str): """ - Prepares the backend for requests on a channel + Registers an instance with the client + + The client will periodically refresh grpc channel pool used to make + requests, and new channels will be warmed for each registered instance + + Channels will not be refreshed unless at least one instance is registered + """ + instance_name = self._gapic_client.instance_path(self.project, instance_id) + self._active_instances.add(instance_name) + # if refresh tasks aren't active, start them as background tasks + if not self._channel_refresh_tasks: + for channel_idx in range(len(self.transport.channel_pool)): + refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) + self._channel_refresh_tasks.append(refresh_task) + + async def remove_instance_registration(self, instance_id: str): + """ + Removes an instance from the client's registered instances, to prevent + warming new channels for the instance + """ + instance_name = self._gapic_client.instance_path(self.project, instance_id) + self._active_instances.remove(instance_name) + + async def get_table( + self, + instance_id: str, + table_id: str, + app_profile_id: str | None = None, + *, + register_instance: bool = True, + ) -> Table: + """ + Returns a table instance for making data API requests Args: - channel: grpc channel to ping + register_instance: if True, the client will call `register_instance` on + the `instance_id`, to periodically warm and refresh the channel + pool for the specified instance """ - ping_rpc = channel.unary_unary( - "/google.bigtable.v2.Bigtable/PingAndWarmChannel" - ) - await ping_rpc( - { - "name": self.client._gapic_client.instance_path( - self.client.project, self.instance_id - ) - } - ) + if register_instance: + await self.register_instance(instance_id) + return Table(self, instance_id, table_id, app_profile_id) + + +class Table: + """ + Main Data API surface + + Table object maintains instance_id, table_id, and app_profile_id context, and passes them with + each call + """ + + def __init__( + self, + client: BigtableDataClient, + instance_id: str, + table_id: str, + app_profile_id: str | None = None, + ): + self.client = client + self.instance_id = instance_id + self.table_id = table_id + self.app_profile_id = app_profile_id async def read_rows_stream( self, From 60841c505ca9362211485b7b8485946101407c88 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 12:34:49 -0800 Subject: [PATCH 027/349] call ping and warm on register_instance --- google/cloud/bigtable/client.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 5ac303305..626b3343c 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -145,7 +145,8 @@ async def _manage_channel( async def register_instance(self, instance_id: str): """ - Registers an instance with the client + Registers an instance with the client, and warms the channel pool + for the instance The client will periodically refresh grpc channel pool used to make requests, and new channels will be warmed for each registered instance @@ -154,8 +155,13 @@ async def register_instance(self, instance_id: str): """ instance_name = self._gapic_client.instance_path(self.project, instance_id) self._active_instances.add(instance_name) - # if refresh tasks aren't active, start them as background tasks - if not self._channel_refresh_tasks: + if self._channel_refresh_tasks: + # refresh tasks already running + # call ping and warm on all existing channels + for channel in self.transport.channel_pool: + await self._ping_and_warm_channel(channel) + else: + # refresh tasks aren't active. start them as background tasks for channel_idx in range(len(self.transport.channel_pool)): refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) self._channel_refresh_tasks.append(refresh_task) From 0a3086c5a85ec85d5cf626c35444121eac6b2448 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 12:40:03 -0800 Subject: [PATCH 028/349] fixed typo --- google/cloud/bigtable/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 626b3343c..09462ff69 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -116,11 +116,11 @@ async def _manage_channel( grace_period: time to allow previous channel to serve existing requests before closing, in seconds """ - # warm the current channel immidiately + # warm the current channel immediately channel = self.transport.channel_pool[channel_idx] await self._ping_and_warm_channel(channel) next_sleep = refresh_interval - # continuously refrech the channel every `refresh_interval` seconds + # continuously refresh the channel every `refresh_interval` seconds while True: await asyncio.sleep(next_sleep) # prepare new channel for use From 08c3c4260a83464f8c200d7e1eb2c014950ea698 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 12:56:07 -0800 Subject: [PATCH 029/349] renamed function --- google/cloud/bigtable/client.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 09462ff69..f5e5e0156 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -79,7 +79,7 @@ def __init__( # with the client in `get_table` self._channel_refresh_tasks: list[asyncio.Task[None]] = [] - async def _ping_and_warm_channel(self, channel: grpc.aio.Channel) -> None: + async def _ping_and_warm_instances(self, channel: grpc.aio.Channel) -> None: """ Prepares the backend for requests on a channel @@ -118,7 +118,7 @@ async def _manage_channel( """ # warm the current channel immediately channel = self.transport.channel_pool[channel_idx] - await self._ping_and_warm_channel(channel) + await self._ping_and_warm_instances(channel) next_sleep = refresh_interval # continuously refresh the channel every `refresh_interval` seconds while True: @@ -136,7 +136,7 @@ async def _manage_channel( ("grpc.max_receive_message_length", -1), ], ) - await self._ping_and_warm_channel(channel) + await self._ping_and_warm_instances(channel) # cycle channel out of use, with long grace window before closure start_timestamp = time.time() await self.transport.replace_channel(channel_idx, grace_period, new_channel) @@ -159,7 +159,7 @@ async def register_instance(self, instance_id: str): # refresh tasks already running # call ping and warm on all existing channels for channel in self.transport.channel_pool: - await self._ping_and_warm_channel(channel) + await self._ping_and_warm_instances(channel) else: # refresh tasks aren't active. start them as background tasks for channel_idx in range(len(self.transport.channel_pool)): From 05e10cd99602417c0963edd9ebb1ff9130833a5b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 13:42:54 -0800 Subject: [PATCH 030/349] added comments --- google/cloud/bigtable/client.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 93379cbb0..184c51c2e 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -37,6 +37,7 @@ def __init__( self, *, project: str | None = None, + pool_size: int = 3, credentials: google.auth.credentials.Credentials | None = None, client_options: dict[str, Any] | "google.api_core.client_options.ClientOptions" @@ -44,9 +45,22 @@ def __init__( metadata: list[tuple[str, str]] | None = None, ): """ - Create a client instance + Create a client instance for the Bigtable Data API Args: + project: the project which the client acts on behalf of. + If not passed, falls back to the default inferred + from the environment. + pool_size: The number of grpc channels to maintain + in the internal channel pool. + credentials: + Thehe OAuth2 Credentials to use for this + client. If not passed (and if no ``_http`` object is + passed), falls back to the default inferred from the + environment. + client_options (Optional[Union[dict, google.api_core.client_options.ClientOptions]]): + Client options used to set user options + on the client. API Endpoint should be set through client_options. metadata: a list of metadata headers to be attached to all calls with this client """ pass @@ -54,6 +68,15 @@ def __init__( def get_table( self, instance_id: str, table_id: str, app_profile_id: str | None = None ) -> Table: + """ + Return a Table instance to make API requests for a specific table. + + Args: + instance_id: The ID of the instance that owns the table. + table_id: The ID of the table. + app_profile_id: (Optional) The app profile to associate with requests. + https://cloud.google.com/bigtable/docs/app-profiles + """ return Table(self, instance_id, table_id, app_profile_id) From 895093f976405dcbe0c95504a65ab458d9fd223b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 14:00:21 -0800 Subject: [PATCH 031/349] removed TypeAlis annotation --- google/cloud/bigtable/__init__.py | 3 +-- google/cloud/bigtable/row_response.py | 9 ++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index 91cec3ffe..daa562c0c 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -14,7 +14,6 @@ # limitations under the License. # -from typing_extensions import TypeAlias from typing import List, Tuple from google.cloud.bigtable import gapic_version as package_version @@ -35,7 +34,7 @@ from google.cloud.bigtable.mutations import DeleteAllFromRow # Type alias for the output of sample_keys -RowKeySamples: TypeAlias = List[Tuple[bytes, int]] +RowKeySamples = List[Tuple[bytes, int]] __version__: str = package_version.__version__ diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index 6d4248925..cc48ce87e 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -16,13 +16,12 @@ from collections import OrderedDict from collections import Sequence -from typing_extensions import TypeAlias # Type aliases used internally for readability. -row_key: TypeAlias = bytes -family_id: TypeAlias = str -qualifier: TypeAlias = bytes -row_value: TypeAlias = bytes +row_key = bytes +family_id = str +qualifier = bytes +row_value = bytes class RowResponse(Sequence["CellResponse"]): From 1684274f5f57e9f44874b341305d9d854d93851d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 14:12:32 -0800 Subject: [PATCH 032/349] changed sequence import --- google/cloud/bigtable/row_response.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index cc48ce87e..be6d8c505 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -15,7 +15,7 @@ from __future__ import annotations from collections import OrderedDict -from collections import Sequence +from typing import Sequence # Type aliases used internally for readability. row_key = bytes From 75d276ac6043cf06471d3d81710a964eb55d576f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 14:23:09 -0800 Subject: [PATCH 033/349] updated warning tests --- tests/unit/v2_client/test_client.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/unit/v2_client/test_client.py b/tests/unit/v2_client/test_client.py index fe42574a0..9deac6a25 100644 --- a/tests/unit/v2_client/test_client.py +++ b/tests/unit/v2_client/test_client.py @@ -108,16 +108,20 @@ def _make_client(*args, **kwargs): @mock.patch("os.environ", {}) def test_client_constructor_defaults(): + import warnings from google.api_core import client_info from google.cloud.bigtable.deprecated import __version__ from google.cloud.bigtable.deprecated.client import DATA_SCOPE credentials = _make_credentials() - with mock.patch("google.auth.default") as mocked: - mocked.return_value = credentials, PROJECT - client = _make_client() + with warnings.catch_warnings(record=True) as warned: + with mock.patch("google.auth.default") as mocked: + mocked.return_value = credentials, PROJECT + client = _make_client() + # warn about client deprecation + assert len(warned) == 1 assert client.project == PROJECT assert client._credentials is credentials.with_scopes.return_value assert not client._read_only @@ -147,7 +151,8 @@ def test_client_constructor_explicit(): channel=mock.sentinel.channel, ) - assert len(warned) == 1 + # deprecationw arnning for channel and Client deprecation + assert len(warned) == 2 assert client.project == PROJECT assert client._credentials is credentials.with_scopes.return_value From 92752c01e8eae673ecd8fe0f3f91b48c9dd14861 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 14:30:13 -0800 Subject: [PATCH 034/349] updated doc snippet imports --- docs/snippets.py | 118 +++++++++++++++---------------- docs/snippets_table.py | 154 ++++++++++++++++++++--------------------- 2 files changed, 136 insertions(+), 136 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index 1d93fdf12..084f10270 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -16,7 +16,7 @@ """Testable usage examples for Google Cloud Bigtable API wrapper Each example function takes a ``client`` argument (which must be an instance -of :class:`google.cloud.bigtable.client.Client`) and uses it to perform a task +of :class:`google.cloud.bigtable.deprecated.client.Client`) and uses it to perform a task with the API. To facilitate running the examples as system tests, each example is also passed @@ -40,8 +40,8 @@ from test_utils.retry import RetryErrors from google.cloud._helpers import UTC -from google.cloud.bigtable import Client -from google.cloud.bigtable import enums +from google.cloud.bigtable.deprecated import Client +from google.cloud.bigtable.deprecated import enums UNIQUE_SUFFIX = unique_resource_id("-") @@ -110,8 +110,8 @@ def teardown_module(): def test_bigtable_create_instance(): # [START bigtable_api_create_prod_instance] - from google.cloud.bigtable import Client - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import enums my_instance_id = "inst-my-" + UNIQUE_SUFFIX my_cluster_id = "clus-my-" + UNIQUE_SUFFIX @@ -144,8 +144,8 @@ def test_bigtable_create_instance(): def test_bigtable_create_additional_cluster(): # [START bigtable_api_create_cluster] - from google.cloud.bigtable import Client - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import enums # Assuming that there is an existing instance with `INSTANCE_ID` # on the server already. @@ -181,8 +181,8 @@ def test_bigtable_create_reload_delete_app_profile(): import re # [START bigtable_api_create_app_profile] - from google.cloud.bigtable import Client - from google.cloud.bigtable import enums + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import enums routing_policy_type = enums.RoutingPolicyType.ANY @@ -202,7 +202,7 @@ def test_bigtable_create_reload_delete_app_profile(): # [END bigtable_api_create_app_profile] # [START bigtable_api_app_profile_name] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -219,7 +219,7 @@ def test_bigtable_create_reload_delete_app_profile(): assert _profile_name_re.match(app_profile_name) # [START bigtable_api_app_profile_exists] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -230,7 +230,7 @@ def test_bigtable_create_reload_delete_app_profile(): assert app_profile_exists # [START bigtable_api_reload_app_profile] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -241,7 +241,7 @@ def test_bigtable_create_reload_delete_app_profile(): assert app_profile.routing_policy_type == ROUTING_POLICY_TYPE # [START bigtable_api_update_app_profile] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -255,7 +255,7 @@ def test_bigtable_create_reload_delete_app_profile(): assert app_profile.description == description # [START bigtable_api_delete_app_profile] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -269,7 +269,7 @@ def test_bigtable_create_reload_delete_app_profile(): def test_bigtable_list_instances(): # [START bigtable_api_list_instances] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) (instances_list, failed_locations_list) = client.list_instances() @@ -280,7 +280,7 @@ def test_bigtable_list_instances(): def test_bigtable_list_clusters_on_instance(): # [START bigtable_api_list_clusters_on_instance] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -292,7 +292,7 @@ def test_bigtable_list_clusters_on_instance(): def test_bigtable_list_clusters_in_project(): # [START bigtable_api_list_clusters_in_project] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) (clusters_list, failed_locations_list) = client.list_clusters() @@ -309,7 +309,7 @@ def test_bigtable_list_app_profiles(): app_profile = app_profile.create(ignore_warnings=True) # [START bigtable_api_list_app_profiles] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -325,7 +325,7 @@ def test_bigtable_list_app_profiles(): def test_bigtable_instance_exists(): # [START bigtable_api_check_instance_exists] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -337,7 +337,7 @@ def test_bigtable_instance_exists(): def test_bigtable_cluster_exists(): # [START bigtable_api_check_cluster_exists] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -350,7 +350,7 @@ def test_bigtable_cluster_exists(): def test_bigtable_reload_instance(): # [START bigtable_api_reload_instance] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -362,7 +362,7 @@ def test_bigtable_reload_instance(): def test_bigtable_reload_cluster(): # [START bigtable_api_reload_cluster] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -375,7 +375,7 @@ def test_bigtable_reload_cluster(): def test_bigtable_update_instance(): # [START bigtable_api_update_instance] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -389,7 +389,7 @@ def test_bigtable_update_instance(): def test_bigtable_update_cluster(): # [START bigtable_api_update_cluster] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -403,7 +403,7 @@ def test_bigtable_update_cluster(): def test_bigtable_cluster_disable_autoscaling(): # [START bigtable_api_cluster_disable_autoscaling] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -424,8 +424,8 @@ def test_bigtable_create_table(): # [START bigtable_api_create_table] from google.api_core import exceptions from google.api_core import retry - from google.cloud.bigtable import Client - from google.cloud.bigtable import column_family + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -450,7 +450,7 @@ def test_bigtable_create_table(): def test_bigtable_list_tables(): # [START bigtable_api_list_tables] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -463,7 +463,7 @@ def test_bigtable_list_tables(): def test_bigtable_delete_cluster(): - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -480,7 +480,7 @@ def test_bigtable_delete_cluster(): operation.result(timeout=1000) # [START bigtable_api_delete_cluster] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -493,7 +493,7 @@ def test_bigtable_delete_cluster(): def test_bigtable_delete_instance(): - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) @@ -515,7 +515,7 @@ def test_bigtable_delete_instance(): INSTANCES_TO_DELETE.append(instance) # [START bigtable_api_delete_instance] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) @@ -531,7 +531,7 @@ def test_bigtable_delete_instance(): def test_bigtable_test_iam_permissions(): # [START bigtable_api_test_iam_permissions] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -547,9 +547,9 @@ def test_bigtable_set_iam_policy_then_get_iam_policy(): service_account_email = Config.CLIENT._credentials.service_account_email # [START bigtable_api_set_iam_policy] - from google.cloud.bigtable import Client - from google.cloud.bigtable.policy import Policy - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -563,7 +563,7 @@ def test_bigtable_set_iam_policy_then_get_iam_policy(): assert len(policy_latest.bigtable_admins) > 0 # [START bigtable_api_get_iam_policy] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -577,7 +577,7 @@ def test_bigtable_project_path(): import re # [START bigtable_api_project_path] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) project_path = client.project_path @@ -586,7 +586,7 @@ def test_bigtable_project_path(): def test_bigtable_table_data_client(): # [START bigtable_api_table_data_client] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) table_data_client = client.table_data_client @@ -595,7 +595,7 @@ def test_bigtable_table_data_client(): def test_bigtable_table_admin_client(): # [START bigtable_api_table_admin_client] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) table_admin_client = client.table_admin_client @@ -604,7 +604,7 @@ def test_bigtable_table_admin_client(): def test_bigtable_instance_admin_client(): # [START bigtable_api_instance_admin_client] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance_admin_client = client.instance_admin_client @@ -615,9 +615,9 @@ def test_bigtable_admins_policy(): service_account_email = Config.CLIENT._credentials.service_account_email # [START bigtable_api_admins_policy] - from google.cloud.bigtable import Client - from google.cloud.bigtable.policy import Policy - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -636,9 +636,9 @@ def test_bigtable_readers_policy(): service_account_email = Config.CLIENT._credentials.service_account_email # [START bigtable_api_readers_policy] - from google.cloud.bigtable import Client - from google.cloud.bigtable.policy import Policy - from google.cloud.bigtable.policy import BIGTABLE_READER_ROLE + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.deprecated.policy import BIGTABLE_READER_ROLE client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -657,9 +657,9 @@ def test_bigtable_users_policy(): service_account_email = Config.CLIENT._credentials.service_account_email # [START bigtable_api_users_policy] - from google.cloud.bigtable import Client - from google.cloud.bigtable.policy import Policy - from google.cloud.bigtable.policy import BIGTABLE_USER_ROLE + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.deprecated.policy import BIGTABLE_USER_ROLE client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -678,9 +678,9 @@ def test_bigtable_viewers_policy(): service_account_email = Config.CLIENT._credentials.service_account_email # [START bigtable_api_viewers_policy] - from google.cloud.bigtable import Client - from google.cloud.bigtable.policy import Policy - from google.cloud.bigtable.policy import BIGTABLE_VIEWER_ROLE + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.deprecated.policy import BIGTABLE_VIEWER_ROLE client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -699,7 +699,7 @@ def test_bigtable_instance_name(): import re # [START bigtable_api_instance_name] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -711,7 +711,7 @@ def test_bigtable_cluster_name(): import re # [START bigtable_api_cluster_name] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -722,7 +722,7 @@ def test_bigtable_cluster_name(): def test_bigtable_instance_from_pb(): # [START bigtable_api_instance_from_pb] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 client = Client(admin=True) @@ -741,7 +741,7 @@ def test_bigtable_instance_from_pb(): def test_bigtable_cluster_from_pb(): # [START bigtable_api_cluster_from_pb] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 client = Client(admin=True) @@ -767,7 +767,7 @@ def test_bigtable_cluster_from_pb(): def test_bigtable_instance_state(): # [START bigtable_api_instance_state] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -779,7 +779,7 @@ def test_bigtable_instance_state(): def test_bigtable_cluster_state(): # [START bigtable_api_cluster_state] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) diff --git a/docs/snippets_table.py b/docs/snippets_table.py index f27260425..72c342907 100644 --- a/docs/snippets_table.py +++ b/docs/snippets_table.py @@ -16,7 +16,7 @@ """Testable usage examples for Google Cloud Bigtable API wrapper Each example function takes a ``client`` argument (which must be an instance -of :class:`google.cloud.bigtable.client.Client`) and uses it to perform a task +of :class:`google.cloud.bigtable.deprecated.client.Client`) and uses it to perform a task with the API. To facilitate running the examples as system tests, each example is also passed @@ -38,9 +38,9 @@ from test_utils.retry import RetryErrors from google.cloud._helpers import UTC -from google.cloud.bigtable import Client -from google.cloud.bigtable import enums -from google.cloud.bigtable import column_family +from google.cloud.bigtable.deprecated import Client +from google.cloud.bigtable.deprecated import enums +from google.cloud.bigtable.deprecated import column_family INSTANCE_ID = "snippet" + unique_resource_id("-") @@ -113,8 +113,8 @@ def teardown_module(): def test_bigtable_create_table(): # [START bigtable_api_create_table] - from google.cloud.bigtable import Client - from google.cloud.bigtable import column_family + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -143,7 +143,7 @@ def test_bigtable_sample_row_keys(): assert table_sample.exists() # [START bigtable_api_sample_row_keys] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -159,7 +159,7 @@ def test_bigtable_sample_row_keys(): def test_bigtable_write_read_drop_truncate(): # [START bigtable_api_mutate_rows] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -190,7 +190,7 @@ def test_bigtable_write_read_drop_truncate(): # [END bigtable_api_mutate_rows] assert len(response) == len(rows) # [START bigtable_api_read_row] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -200,7 +200,7 @@ def test_bigtable_write_read_drop_truncate(): # [END bigtable_api_read_row] assert row.row_key.decode("utf-8") == row_key # [START bigtable_api_read_rows] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -218,7 +218,7 @@ def test_bigtable_write_read_drop_truncate(): # [END bigtable_api_read_rows] assert len(total_rows) == len(rows) # [START bigtable_api_drop_by_prefix] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -231,7 +231,7 @@ def test_bigtable_write_read_drop_truncate(): assert row.row_key.decode("utf-8") not in dropped_row_keys # [START bigtable_api_truncate_table] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -246,7 +246,7 @@ def test_bigtable_write_read_drop_truncate(): def test_bigtable_mutations_batcher(): # [START bigtable_api_mutations_batcher] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -297,7 +297,7 @@ def test_bigtable_mutations_batcher(): def test_bigtable_table_column_family(): # [START bigtable_api_table_column_family] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -311,7 +311,7 @@ def test_bigtable_table_column_family(): def test_bigtable_list_tables(): # [START bigtable_api_list_tables] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -324,7 +324,7 @@ def test_bigtable_table_name(): import re # [START bigtable_api_table_name] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -342,7 +342,7 @@ def test_bigtable_table_name(): def test_bigtable_list_column_families(): # [START bigtable_api_list_column_families] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -356,7 +356,7 @@ def test_bigtable_list_column_families(): def test_bigtable_get_cluster_states(): # [START bigtable_api_get_cluster_states] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -374,7 +374,7 @@ def test_bigtable_table_test_iam_permissions(): assert table_policy.exists # [START bigtable_api_table_test_iam_permissions] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -392,9 +392,9 @@ def test_bigtable_table_set_iam_policy_then_get_iam_policy(): service_account_email = Config.CLIENT._credentials.service_account_email # [START bigtable_api_table_set_iam_policy] - from google.cloud.bigtable import Client - from google.cloud.bigtable.policy import Policy - from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -407,7 +407,7 @@ def test_bigtable_table_set_iam_policy_then_get_iam_policy(): assert len(policy_latest.bigtable_admins) > 0 # [START bigtable_api_table_get_iam_policy] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -419,7 +419,7 @@ def test_bigtable_table_set_iam_policy_then_get_iam_policy(): def test_bigtable_table_exists(): # [START bigtable_api_check_table_exists] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -435,7 +435,7 @@ def test_bigtable_delete_table(): assert table_del.exists() # [START bigtable_api_delete_table] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -448,7 +448,7 @@ def test_bigtable_delete_table(): def test_bigtable_table_row(): # [START bigtable_api_table_row] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -475,7 +475,7 @@ def test_bigtable_table_row(): def test_bigtable_table_append_row(): # [START bigtable_api_table_append_row] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -502,7 +502,7 @@ def test_bigtable_table_append_row(): def test_bigtable_table_direct_row(): # [START bigtable_api_table_direct_row] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -529,8 +529,8 @@ def test_bigtable_table_direct_row(): def test_bigtable_table_conditional_row(): # [START bigtable_api_table_conditional_row] - from google.cloud.bigtable import Client - from google.cloud.bigtable.row_filters import PassAllFilter + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated.row_filters import PassAllFilter client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -558,7 +558,7 @@ def test_bigtable_table_conditional_row(): def test_bigtable_column_family_name(): # [START bigtable_api_column_family_name] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -581,8 +581,8 @@ def test_bigtable_column_family_name(): def test_bigtable_create_update_delete_column_family(): # [START bigtable_api_create_column_family] - from google.cloud.bigtable import Client - from google.cloud.bigtable import column_family + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -598,8 +598,8 @@ def test_bigtable_create_update_delete_column_family(): assert column_families[column_family_id].gc_rule == gc_rule # [START bigtable_api_update_column_family] - from google.cloud.bigtable import Client - from google.cloud.bigtable import column_family + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -617,8 +617,8 @@ def test_bigtable_create_update_delete_column_family(): assert updated_families[column_family_id].gc_rule == max_age_rule # [START bigtable_api_delete_column_family] - from google.cloud.bigtable import Client - from google.cloud.bigtable import column_family + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -653,8 +653,8 @@ def test_bigtable_add_row_add_row_range_add_row_range_from_keys(): Config.TABLE.mutate_rows(rows) # [START bigtable_api_add_row_key] - from google.cloud.bigtable import Client - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated.row_set import RowSet client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -670,9 +670,9 @@ def test_bigtable_add_row_add_row_range_add_row_range_from_keys(): assert found_row_keys == expected_row_keys # [START bigtable_api_add_row_range] - from google.cloud.bigtable import Client - from google.cloud.bigtable.row_set import RowSet - from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.deprecated.row_set import RowRange client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -688,8 +688,8 @@ def test_bigtable_add_row_add_row_range_add_row_range_from_keys(): assert found_row_keys == expected_row_keys # [START bigtable_api_row_range_from_keys] - from google.cloud.bigtable import Client - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated.row_set import RowSet client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -723,8 +723,8 @@ def test_bigtable_add_row_range_with_prefix(): Config.TABLE.mutate_rows(rows) # [START bigtable_api_add_row_range_with_prefix] - from google.cloud.bigtable import Client - from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated.row_set import RowSet client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -747,7 +747,7 @@ def test_bigtable_add_row_range_with_prefix(): def test_bigtable_batcher_mutate_flush_mutate_rows(): # [START bigtable_api_batcher_mutate] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -769,7 +769,7 @@ def test_bigtable_batcher_mutate_flush_mutate_rows(): # [END bigtable_api_batcher_mutate] # [START bigtable_api_batcher_flush] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -795,7 +795,7 @@ def test_bigtable_batcher_mutate_flush_mutate_rows(): table.truncate(timeout=200) # [START bigtable_api_batcher_mutate_rows] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -829,8 +829,8 @@ def test_bigtable_batcher_mutate_flush_mutate_rows(): def test_bigtable_create_family_gc_max_age(): # [START bigtable_api_create_family_gc_max_age] - from google.cloud.bigtable import Client - from google.cloud.bigtable import column_family + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -851,8 +851,8 @@ def test_bigtable_create_family_gc_max_age(): def test_bigtable_create_family_gc_max_versions(): # [START bigtable_api_create_family_gc_max_versions] - from google.cloud.bigtable import Client - from google.cloud.bigtable import column_family + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -872,8 +872,8 @@ def test_bigtable_create_family_gc_max_versions(): def test_bigtable_create_family_gc_union(): # [START bigtable_api_create_family_gc_union] - from google.cloud.bigtable import Client - from google.cloud.bigtable import column_family + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -898,8 +898,8 @@ def test_bigtable_create_family_gc_union(): def test_bigtable_create_family_gc_intersection(): # [START bigtable_api_create_family_gc_intersection] - from google.cloud.bigtable import Client - from google.cloud.bigtable import column_family + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -927,8 +927,8 @@ def test_bigtable_create_family_gc_intersection(): def test_bigtable_create_family_gc_nested(): # [START bigtable_api_create_family_gc_nested] - from google.cloud.bigtable import Client - from google.cloud.bigtable import column_family + from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable.deprecated import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -978,7 +978,7 @@ def test_bigtable_row_data_cells_cell_value_cell_values(): row.commit() # [START bigtable_api_row_data_cells] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -993,7 +993,7 @@ def test_bigtable_row_data_cells_cell_value_cell_values(): assert actual_cell_value == value # [START bigtable_api_row_cell_value] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1006,7 +1006,7 @@ def test_bigtable_row_data_cells_cell_value_cell_values(): assert cell_value == value # [START bigtable_api_row_cell_values] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1025,7 +1025,7 @@ def test_bigtable_row_data_cells_cell_value_cell_values(): row.commit() # [START bigtable_api_row_find_cells] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1042,7 +1042,7 @@ def test_bigtable_row_data_cells_cell_value_cell_values(): def test_bigtable_row_setcell_rowkey(): # [START bigtable_api_row_set_cell] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1061,7 +1061,7 @@ def test_bigtable_row_setcell_rowkey(): assert status.code == 0 # [START bigtable_api_row_row_key] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1073,7 +1073,7 @@ def test_bigtable_row_setcell_rowkey(): assert row_key == ROW_KEY1 # [START bigtable_api_row_table] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1098,7 +1098,7 @@ def test_bigtable_row_delete(): assert written_row_keys == [b"row_key_1"] # [START bigtable_api_row_delete] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1130,7 +1130,7 @@ def test_bigtable_row_delete_cell(): assert written_row_keys == [row_key1] # [START bigtable_api_row_delete_cell] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1163,7 +1163,7 @@ def test_bigtable_row_delete_cells(): assert written_row_keys == [row_key1] # [START bigtable_api_row_delete_cells] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1189,7 +1189,7 @@ def test_bigtable_row_clear(): assert mutation_size > 0 # [START bigtable_api_row_clear] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1208,7 +1208,7 @@ def test_bigtable_row_clear(): def test_bigtable_row_clear_get_mutations_size(): # [START bigtable_api_row_get_mutations_size] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1230,7 +1230,7 @@ def test_bigtable_row_clear_get_mutations_size(): def test_bigtable_row_setcell_commit_rowkey(): # [START bigtable_api_row_set_cell] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1244,7 +1244,7 @@ def test_bigtable_row_setcell_commit_rowkey(): row_obj.commit() # [START bigtable_api_row_commit] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1264,7 +1264,7 @@ def test_bigtable_row_setcell_commit_rowkey(): assert written_row_keys == [b"row_key_1", b"row_key_2"] # [START bigtable_api_row_row_key] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1286,7 +1286,7 @@ def test_bigtable_row_append_cell_value(): row.commit() # [START bigtable_api_row_append_cell_value] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1303,7 +1303,7 @@ def test_bigtable_row_append_cell_value(): assert actual_value == cell_val1 + cell_val2 # [START bigtable_api_row_commit] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1315,7 +1315,7 @@ def test_bigtable_row_append_cell_value(): # [END bigtable_api_row_commit] # [START bigtable_api_row_increment_cell_value] - from google.cloud.bigtable import Client + from google.cloud.bigtable.deprecated import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) From 79c82c3f2b41c4fa812dd8110ac5b8449fd5dfb2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 14:53:19 -0800 Subject: [PATCH 035/349] updated docs --- docs/app-profile.rst | 2 +- docs/backup.rst | 2 +- docs/client-intro.rst | 18 ++++----- docs/client.rst | 2 +- docs/cluster.rst | 2 +- docs/column-family.rst | 22 +++++------ docs/data-api.rst | 82 ++++++++++++++++++++-------------------- docs/encryption-info.rst | 2 +- docs/instance-api.rst | 32 ++++++++-------- docs/instance.rst | 2 +- docs/row-data.rst | 2 +- docs/row-filters.rst | 12 +++--- docs/row-set.rst | 2 +- docs/row.rst | 2 +- docs/table-api.rst | 40 ++++++++++---------- docs/table.rst | 2 +- docs/usage.rst | 16 ++++---- 17 files changed, 121 insertions(+), 121 deletions(-) diff --git a/docs/app-profile.rst b/docs/app-profile.rst index 5c9d426c2..50e57c179 100644 --- a/docs/app-profile.rst +++ b/docs/app-profile.rst @@ -1,6 +1,6 @@ App Profile ~~~~~~~~~~~ -.. automodule:: google.cloud.bigtable.app_profile +.. automodule:: google.cloud.bigtable.deprecated.app_profile :members: :show-inheritance: diff --git a/docs/backup.rst b/docs/backup.rst index e75abd431..46c32c91b 100644 --- a/docs/backup.rst +++ b/docs/backup.rst @@ -1,6 +1,6 @@ Backup ~~~~~~~~ -.. automodule:: google.cloud.bigtable.backup +.. automodule:: google.cloud.bigtable.deprecated.backup :members: :show-inheritance: diff --git a/docs/client-intro.rst b/docs/client-intro.rst index 242068499..d75cf5f96 100644 --- a/docs/client-intro.rst +++ b/docs/client-intro.rst @@ -1,21 +1,21 @@ Base for Everything =================== -To use the API, the :class:`Client ` +To use the API, the :class:`Client ` class defines a high-level interface which handles authorization and creating other objects: .. code:: python - from google.cloud.bigtable.client import Client + from google.cloud.bigtable.deprecated.client import Client client = Client() Long-lived Defaults ------------------- -When creating a :class:`Client `, the +When creating a :class:`Client `, the ``user_agent`` argument has sensible a default -(:data:`DEFAULT_USER_AGENT `). +(:data:`DEFAULT_USER_AGENT `). However, you may over-ride it and the value will be used throughout all API requests made with the ``client`` you create. @@ -38,14 +38,14 @@ Configuration .. code:: - >>> from google.cloud import bigtable + >>> import google.cloud.deprecated as bigtable >>> client = bigtable.Client() or pass in ``credentials`` and ``project`` explicitly .. code:: - >>> from google.cloud import bigtable + >>> import google.cloud.deprecated as bigtable >>> client = bigtable.Client(project='my-project', credentials=creds) .. tip:: @@ -73,15 +73,15 @@ you can pass the ``read_only`` argument: client = bigtable.Client(read_only=True) This will ensure that the -:data:`READ_ONLY_SCOPE ` is used +:data:`READ_ONLY_SCOPE ` is used for API requests (so any accidental requests that would modify data will fail). Next Step --------- -After a :class:`Client `, the next highest-level -object is an :class:`Instance `. You'll need +After a :class:`Client `, the next highest-level +object is an :class:`Instance `. You'll need one before you can interact with tables or data. Head next to learn about the :doc:`instance-api`. diff --git a/docs/client.rst b/docs/client.rst index c48595c8a..df92a9861 100644 --- a/docs/client.rst +++ b/docs/client.rst @@ -1,6 +1,6 @@ Client ~~~~~~ -.. automodule:: google.cloud.bigtable.client +.. automodule:: google.cloud.bigtable.deprecated.client :members: :show-inheritance: diff --git a/docs/cluster.rst b/docs/cluster.rst index ad33aae5e..9747b226f 100644 --- a/docs/cluster.rst +++ b/docs/cluster.rst @@ -1,6 +1,6 @@ Cluster ~~~~~~~ -.. automodule:: google.cloud.bigtable.cluster +.. automodule:: google.cloud.bigtable.deprecated.cluster :members: :show-inheritance: diff --git a/docs/column-family.rst b/docs/column-family.rst index de6c1eb1f..39095000d 100644 --- a/docs/column-family.rst +++ b/docs/column-family.rst @@ -2,7 +2,7 @@ Column Families =============== When creating a -:class:`ColumnFamily `, it is +:class:`ColumnFamily `, it is possible to set garbage collection rules for expired data. By setting a rule, cells in the table matching the rule will be deleted @@ -10,19 +10,19 @@ during periodic garbage collection (which executes opportunistically in the background). The types -:class:`MaxAgeGCRule `, -:class:`MaxVersionsGCRule `, -:class:`GarbageCollectionRuleUnion ` and -:class:`GarbageCollectionRuleIntersection ` +:class:`MaxAgeGCRule `, +:class:`MaxVersionsGCRule `, +:class:`GarbageCollectionRuleUnion ` and +:class:`GarbageCollectionRuleIntersection ` can all be used as the optional ``gc_rule`` argument in the -:class:`ColumnFamily ` +:class:`ColumnFamily ` constructor. This value is then used in the -:meth:`create() ` and -:meth:`update() ` methods. +:meth:`create() ` and +:meth:`update() ` methods. These rules can be nested arbitrarily, with a -:class:`MaxAgeGCRule ` or -:class:`MaxVersionsGCRule ` +:class:`MaxAgeGCRule ` or +:class:`MaxVersionsGCRule ` at the lowest level of the nesting: .. code:: python @@ -44,6 +44,6 @@ at the lowest level of the nesting: ---- -.. automodule:: google.cloud.bigtable.column_family +.. automodule:: google.cloud.bigtable.deprecated.column_family :members: :show-inheritance: diff --git a/docs/data-api.rst b/docs/data-api.rst index 01a49178f..e68835d1a 100644 --- a/docs/data-api.rst +++ b/docs/data-api.rst @@ -1,7 +1,7 @@ Data API ======== -After creating a :class:`Table ` and some +After creating a :class:`Table ` and some column families, you are ready to store and retrieve data. Cells vs. Columns vs. Column Families @@ -27,7 +27,7 @@ Modifying Data Since data is stored in cells, which are stored in rows, we use the metaphor of a **row** in classes that are used to modify (write, update, delete) data in a -:class:`Table `. +:class:`Table `. Direct vs. Conditional vs. Append --------------------------------- @@ -38,26 +38,26 @@ methods. * The **direct** way is via `MutateRow`_ which involves simply adding, overwriting or deleting cells. The - :class:`DirectRow ` class + :class:`DirectRow ` class handles direct mutations. * The **conditional** way is via `CheckAndMutateRow`_. This method first checks if some filter is matched in a given row, then applies one of two sets of mutations, depending on if a match occurred or not. (These mutation sets are called the "true mutations" and "false mutations".) The - :class:`ConditionalRow ` class + :class:`ConditionalRow ` class handles conditional mutations. * The **append** way is via `ReadModifyWriteRow`_. This simply appends (as bytes) or increments (as an integer) data in a presumed existing cell in a row. The - :class:`AppendRow ` class + :class:`AppendRow ` class handles append mutations. Row Factory ----------- A single factory can be used to create any of the three row types. -To create a :class:`DirectRow `: +To create a :class:`DirectRow `: .. code:: python @@ -66,15 +66,15 @@ To create a :class:`DirectRow `: Unlike the previous string values we've used before, the row key must be ``bytes``. -To create a :class:`ConditionalRow `, -first create a :class:`RowFilter ` and +To create a :class:`ConditionalRow `, +first create a :class:`RowFilter ` and then .. code:: python cond_row = table.row(row_key, filter_=filter_) -To create an :class:`AppendRow ` +To create an :class:`AppendRow ` .. code:: python @@ -95,7 +95,7 @@ Direct Mutations Direct mutations can be added via one of four methods -* :meth:`set_cell() ` allows a +* :meth:`set_cell() ` allows a single value to be written to a column .. code:: python @@ -109,7 +109,7 @@ Direct mutations can be added via one of four methods The value can either be bytes or an integer, which will be converted to bytes as a signed 64-bit integer. -* :meth:`delete_cell() ` deletes +* :meth:`delete_cell() ` deletes all cells (i.e. for all timestamps) in a given column .. code:: python @@ -119,7 +119,7 @@ Direct mutations can be added via one of four methods Remember, this only happens in the ``row`` we are using. If we only want to delete cells from a limited range of time, a - :class:`TimestampRange ` can + :class:`TimestampRange ` can be used .. code:: python @@ -127,9 +127,9 @@ Direct mutations can be added via one of four methods row.delete_cell(column_family_id, column, time_range=time_range) -* :meth:`delete_cells() ` does +* :meth:`delete_cells() ` does the same thing as - :meth:`delete_cell() `, + :meth:`delete_cell() `, but accepts a list of columns in a column family rather than a single one. .. code:: python @@ -138,7 +138,7 @@ Direct mutations can be added via one of four methods time_range=time_range) In addition, if we want to delete cells from every column in a column family, - the special :attr:`ALL_COLUMNS ` + the special :attr:`ALL_COLUMNS ` value can be used .. code:: python @@ -146,7 +146,7 @@ Direct mutations can be added via one of four methods row.delete_cells(column_family_id, row.ALL_COLUMNS, time_range=time_range) -* :meth:`delete() ` will delete the +* :meth:`delete() ` will delete the entire row .. code:: python @@ -177,14 +177,14 @@ Append Mutations Append mutations can be added via one of two methods -* :meth:`append_cell_value() ` +* :meth:`append_cell_value() ` appends a bytes value to an existing cell: .. code:: python append_row.append_cell_value(column_family_id, column, bytes_value) -* :meth:`increment_cell_value() ` +* :meth:`increment_cell_value() ` increments an integer value in an existing cell: .. code:: python @@ -217,7 +217,7 @@ Read Single Row from a Table ---------------------------- To make a `ReadRows`_ API request for a single row key, use -:meth:`Table.read_row() `: +:meth:`Table.read_row() `: .. code:: python @@ -226,34 +226,34 @@ To make a `ReadRows`_ API request for a single row key, use { u'fam1': { b'col1': [ - , - , + , + , ], b'col2': [ - , + , ], }, u'fam2': { b'col3': [ - , - , - , + , + , + , ], }, } >>> cell = row_data.cells[u'fam1'][b'col1'][0] >>> cell - + >>> cell.value b'val1' >>> cell.timestamp datetime.datetime(2016, 2, 27, 3, 41, 18, 122823, tzinfo=) -Rather than returning a :class:`DirectRow ` +Rather than returning a :class:`DirectRow ` or similar class, this method returns a -:class:`PartialRowData ` +:class:`PartialRowData ` instance. This class is used for reading and parsing data rather than for -modifying data (as :class:`DirectRow ` is). +modifying data (as :class:`DirectRow ` is). A filter can also be applied to the results: @@ -262,15 +262,15 @@ A filter can also be applied to the results: row_data = table.read_row(row_key, filter_=filter_val) The allowable ``filter_`` values are the same as those used for a -:class:`ConditionalRow `. For +:class:`ConditionalRow `. For more information, see the -:meth:`Table.read_row() ` documentation. +:meth:`Table.read_row() ` documentation. Stream Many Rows from a Table ----------------------------- To make a `ReadRows`_ API request for a stream of rows, use -:meth:`Table.read_rows() `: +:meth:`Table.read_rows() `: .. code:: python @@ -279,32 +279,32 @@ To make a `ReadRows`_ API request for a stream of rows, use Using gRPC over HTTP/2, a continual stream of responses will be delivered. In particular -* :meth:`consume_next() ` +* :meth:`consume_next() ` pulls the next result from the stream, parses it and stores it on the - :class:`PartialRowsData ` instance -* :meth:`consume_all() ` + :class:`PartialRowsData ` instance +* :meth:`consume_all() ` pulls results from the stream until there are no more -* :meth:`cancel() ` closes +* :meth:`cancel() ` closes the stream -See the :class:`PartialRowsData ` +See the :class:`PartialRowsData ` documentation for more information. As with -:meth:`Table.read_row() `, an optional +:meth:`Table.read_row() `, an optional ``filter_`` can be applied. In addition a ``start_key`` and / or ``end_key`` can be supplied for the stream, a ``limit`` can be set and a boolean ``allow_row_interleaving`` can be specified to allow faster streamed results at the potential cost of non-sequential reads. -See the :meth:`Table.read_rows() ` +See the :meth:`Table.read_rows() ` documentation for more information on the optional arguments. Sample Keys in a Table ---------------------- Make a `SampleRowKeys`_ API request with -:meth:`Table.sample_row_keys() `: +:meth:`Table.sample_row_keys() `: .. code:: python @@ -315,7 +315,7 @@ approximately equal size, which can be used to break up the data for distributed tasks like mapreduces. As with -:meth:`Table.read_rows() `, the +:meth:`Table.read_rows() `, the returned ``keys_iterator`` is connected to a cancellable HTTP/2 stream. The next key in the result can be accessed via diff --git a/docs/encryption-info.rst b/docs/encryption-info.rst index 46f19880f..62b77ea0c 100644 --- a/docs/encryption-info.rst +++ b/docs/encryption-info.rst @@ -1,6 +1,6 @@ Encryption Info ~~~~~~~~~~~~~~~ -.. automodule:: google.cloud.bigtable.encryption_info +.. automodule:: google.cloud.bigtable.deprecated.encryption_info :members: :show-inheritance: diff --git a/docs/instance-api.rst b/docs/instance-api.rst index 88b4eb4dc..78123e8ca 100644 --- a/docs/instance-api.rst +++ b/docs/instance-api.rst @@ -1,7 +1,7 @@ Instance Admin API ================== -After creating a :class:`Client `, you can +After creating a :class:`Client `, you can interact with individual instances for a project. List Instances @@ -9,7 +9,7 @@ List Instances If you want a comprehensive list of all existing instances, make a `ListInstances`_ API request with -:meth:`Client.list_instances() `: +:meth:`Client.list_instances() `: .. code:: python @@ -18,7 +18,7 @@ If you want a comprehensive list of all existing instances, make a Instance Factory ---------------- -To create an :class:`Instance ` object: +To create an :class:`Instance ` object: .. code:: python @@ -40,7 +40,7 @@ Create a new Instance --------------------- After creating the instance object, make a `CreateInstance`_ API request -with :meth:`create() `: +with :meth:`create() `: .. code:: python @@ -54,14 +54,14 @@ Check on Current Operation When modifying an instance (via a `CreateInstance`_ request), the Bigtable API will return a `long-running operation`_ and a corresponding - :class:`Operation ` object + :class:`Operation ` object will be returned by - :meth:`create() `. + :meth:`create() `. You can check if a long-running operation (for a -:meth:`create() ` has finished +:meth:`create() ` has finished by making a `GetOperation`_ request with -:meth:`Operation.finished() `: +:meth:`Operation.finished() `: .. code:: python @@ -71,18 +71,18 @@ by making a `GetOperation`_ request with .. note:: - Once an :class:`Operation ` object + Once an :class:`Operation ` object has returned :data:`True` from - :meth:`finished() `, the + :meth:`finished() `, the object should not be re-used. Subsequent calls to - :meth:`finished() ` + :meth:`finished() ` will result in a :class:`ValueError `. Get metadata for an existing Instance ------------------------------------- After creating the instance object, make a `GetInstance`_ API request -with :meth:`reload() `: +with :meth:`reload() `: .. code:: python @@ -94,7 +94,7 @@ Update an existing Instance --------------------------- After creating the instance object, make an `UpdateInstance`_ API request -with :meth:`update() `: +with :meth:`update() `: .. code:: python @@ -105,7 +105,7 @@ Delete an existing Instance --------------------------- Make a `DeleteInstance`_ API request with -:meth:`delete() `: +:meth:`delete() `: .. code:: python @@ -115,8 +115,8 @@ Next Step --------- Now we go down the hierarchy from -:class:`Instance ` to a -:class:`Table `. +:class:`Instance ` to a +:class:`Table `. Head next to learn about the :doc:`table-api`. diff --git a/docs/instance.rst b/docs/instance.rst index f9be9672f..3a61faf1c 100644 --- a/docs/instance.rst +++ b/docs/instance.rst @@ -1,6 +1,6 @@ Instance ~~~~~~~~ -.. automodule:: google.cloud.bigtable.instance +.. automodule:: google.cloud.bigtable.deprecated.instance :members: :show-inheritance: diff --git a/docs/row-data.rst b/docs/row-data.rst index 503f9b1cb..b9013ebf5 100644 --- a/docs/row-data.rst +++ b/docs/row-data.rst @@ -1,6 +1,6 @@ Row Data ~~~~~~~~ -.. automodule:: google.cloud.bigtable.row_data +.. automodule:: google.cloud.bigtable.deprecated.row_data :members: :show-inheritance: diff --git a/docs/row-filters.rst b/docs/row-filters.rst index 9884ce400..8d1fac46b 100644 --- a/docs/row-filters.rst +++ b/docs/row-filters.rst @@ -2,11 +2,11 @@ Bigtable Row Filters ==================== It is possible to use a -:class:`RowFilter ` +:class:`RowFilter ` when adding mutations to a -:class:`ConditionalRow ` and when -reading row data with :meth:`read_row() ` -or :meth:`read_rows() `. +:class:`ConditionalRow ` and when +reading row data with :meth:`read_row() ` +or :meth:`read_rows() `. As laid out in the `RowFilter definition`_, the following basic filters are provided: @@ -60,8 +60,8 @@ level. For example: ---- -.. automodule:: google.cloud.bigtable.row_filters +.. automodule:: google.cloud.bigtable.deprecated.row_filters :members: :show-inheritance: -.. _RowFilter definition: https://googleapis.dev/python/bigtable/latest/row-filters.html?highlight=rowfilter#google.cloud.bigtable.row_filters.RowFilter +.. _RowFilter definition: https://googleapis.dev/python/bigtable/latest/row-filters.html?highlight=rowfilter#google.cloud.bigtable.deprecated.row_filters.RowFilter diff --git a/docs/row-set.rst b/docs/row-set.rst index 5f7a16a02..92cd107e8 100644 --- a/docs/row-set.rst +++ b/docs/row-set.rst @@ -1,6 +1,6 @@ Row Set ~~~~~~~~ -.. automodule:: google.cloud.bigtable.row_set +.. automodule:: google.cloud.bigtable.deprecated.row_set :members: :show-inheritance: diff --git a/docs/row.rst b/docs/row.rst index 33686608b..e8fa48cdd 100644 --- a/docs/row.rst +++ b/docs/row.rst @@ -1,7 +1,7 @@ Bigtable Row ============ -.. automodule:: google.cloud.bigtable.row +.. automodule:: google.cloud.bigtable.deprecated.row :members: :show-inheritance: :inherited-members: diff --git a/docs/table-api.rst b/docs/table-api.rst index 1bbf85146..ce05a3419 100644 --- a/docs/table-api.rst +++ b/docs/table-api.rst @@ -1,7 +1,7 @@ Table Admin API =============== -After creating an :class:`Instance `, you can +After creating an :class:`Instance `, you can interact with individual tables, groups of tables or column families within a table. @@ -10,33 +10,33 @@ List Tables If you want a comprehensive list of all existing tables in a instance, make a `ListTables`_ API request with -:meth:`Instance.list_tables() `: +:meth:`Instance.list_tables() `: .. code:: python >>> instance.list_tables() - [, - ] + [, + ] Table Factory ------------- -To create a :class:`Table ` object: +To create a :class:`Table ` object: .. code:: python table = instance.table(table_id) -Even if this :class:`Table ` already +Even if this :class:`Table ` already has been created with the API, you'll want this object to use as a -parent of a :class:`ColumnFamily ` -or :class:`Row `. +parent of a :class:`ColumnFamily ` +or :class:`Row `. Create a new Table ------------------ After creating the table object, make a `CreateTable`_ API request -with :meth:`create() `: +with :meth:`create() `: .. code:: python @@ -53,7 +53,7 @@ Delete an existing Table ------------------------ Make a `DeleteTable`_ API request with -:meth:`delete() `: +:meth:`delete() `: .. code:: python @@ -67,7 +67,7 @@ associated with a table, the `GetTable`_ API method returns a table object with the names of the column families. To retrieve the list of column families use -:meth:`list_column_families() `: +:meth:`list_column_families() `: .. code:: python @@ -77,7 +77,7 @@ Column Family Factory --------------------- To create a -:class:`ColumnFamily ` object: +:class:`ColumnFamily ` object: .. code:: python @@ -87,7 +87,7 @@ There is no real reason to use this factory unless you intend to create or delete a column family. In addition, you can specify an optional ``gc_rule`` (a -:class:`GarbageCollectionRule ` +:class:`GarbageCollectionRule ` or similar): .. code:: python @@ -99,7 +99,7 @@ This rule helps the backend determine when and how to clean up old cells in the column family. See :doc:`column-family` for more information about -:class:`GarbageCollectionRule ` +:class:`GarbageCollectionRule ` and related classes. Create a new Column Family @@ -107,7 +107,7 @@ Create a new Column Family After creating the column family object, make a `CreateColumnFamily`_ API request with -:meth:`ColumnFamily.create() ` +:meth:`ColumnFamily.create() ` .. code:: python @@ -117,7 +117,7 @@ Delete an existing Column Family -------------------------------- Make a `DeleteColumnFamily`_ API request with -:meth:`ColumnFamily.delete() ` +:meth:`ColumnFamily.delete() ` .. code:: python @@ -127,7 +127,7 @@ Update an existing Column Family -------------------------------- Make an `UpdateColumnFamily`_ API request with -:meth:`ColumnFamily.delete() ` +:meth:`ColumnFamily.delete() ` .. code:: python @@ -137,9 +137,9 @@ Next Step --------- Now we go down the final step of the hierarchy from -:class:`Table ` to -:class:`Row ` as well as streaming -data directly via a :class:`Table `. +:class:`Table ` to +:class:`Row ` as well as streaming +data directly via a :class:`Table `. Head next to learn about the :doc:`data-api`. diff --git a/docs/table.rst b/docs/table.rst index c230725d1..0d938e0af 100644 --- a/docs/table.rst +++ b/docs/table.rst @@ -1,6 +1,6 @@ Table ~~~~~ -.. automodule:: google.cloud.bigtable.table +.. automodule:: google.cloud.bigtable.deprecated.table :members: :show-inheritance: diff --git a/docs/usage.rst b/docs/usage.rst index 33bf7bb7f..80fb65898 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -21,12 +21,12 @@ Using the API In the hierarchy of API concepts -* a :class:`Client ` owns an - :class:`Instance ` -* an :class:`Instance ` owns a - :class:`Table ` -* a :class:`Table ` owns a - :class:`ColumnFamily ` -* a :class:`Table ` owns a - :class:`Row ` +* a :class:`Client ` owns an + :class:`Instance ` +* an :class:`Instance ` owns a + :class:`Table ` +* a :class:`Table ` owns a + :class:`ColumnFamily ` +* a :class:`Table ` owns a + :class:`Row ` (and all the cells in the row) From f3b7fbd99bc88bff4f919a5d9483581b50e796a7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 15:10:38 -0800 Subject: [PATCH 036/349] disabled coverage for skeleton code --- .coveragerc | 1 + 1 file changed, 1 insertion(+) diff --git a/.coveragerc b/.coveragerc index 3128ad99e..9d5663c11 100644 --- a/.coveragerc +++ b/.coveragerc @@ -41,3 +41,4 @@ omit = */core/*.py */site-packages/*.py google/cloud/__init__.py + google/cloud/bigtable/*.py # TODO: remove after terst implementation From aa37a3104188c8a5a60023b73c84cad40b039780 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 15:24:22 -0800 Subject: [PATCH 037/349] fixed cover change --- .coveragerc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.coveragerc b/.coveragerc index 9d5663c11..f6d9ec6f2 100644 --- a/.coveragerc +++ b/.coveragerc @@ -41,4 +41,5 @@ omit = */core/*.py */site-packages/*.py google/cloud/__init__.py - google/cloud/bigtable/*.py # TODO: remove after terst implementation + # TODO: remove after terst implementation + google/cloud/bigtable/*.py From 17d731f1a14cd76590e101de31130a4f8fb626b7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 16:39:13 -0800 Subject: [PATCH 038/349] adjusted coverage setup --- .coveragerc | 4 +--- google/cloud/bigtable/client.py | 4 ++-- google/cloud/bigtable/exceptions.py | 8 ++------ google/cloud/bigtable/mutations_batcher.py | 16 ++++++++-------- owlbot.py | 2 +- 5 files changed, 14 insertions(+), 20 deletions(-) diff --git a/.coveragerc b/.coveragerc index f6d9ec6f2..702b85681 100644 --- a/.coveragerc +++ b/.coveragerc @@ -24,7 +24,7 @@ omit = google/cloud/bigtable_admin/gapic_version.py [report] -fail_under = 100 +fail_under = 99 show_missing = True exclude_lines = # Re-enable the standard pragma @@ -41,5 +41,3 @@ omit = */core/*.py */site-packages/*.py google/cloud/__init__.py - # TODO: remove after terst implementation - google/cloud/bigtable/*.py diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 184c51c2e..df4bf308f 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -63,7 +63,7 @@ def __init__( on the client. API Endpoint should be set through client_options. metadata: a list of metadata headers to be attached to all calls with this client """ - pass + raise NotImplementedError def get_table( self, instance_id: str, table_id: str, app_profile_id: str | None = None @@ -77,7 +77,7 @@ def get_table( app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles """ - return Table(self, instance_id, table_id, app_profile_id) + raise NotImplementedError class Table: diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 6974ab55d..2f9bf399a 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -29,18 +29,14 @@ class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # t """ def __init__(self, message, excs): - if is_311_plus: - super().__init__(message, excs) - else: - super().__init__(message) - self.exceptions = excs + raise NotImplementedError() + class MutationsExceptionGroup(BigtableExceptionGroup): """ Represents one or more exceptions that occur during a bulk mutation operation """ - pass diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 9837124df..3c6cfc3d1 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -22,7 +22,7 @@ from google.cloud.bigtable.row_filters import RowFilter if TYPE_CHECKING: - from google.cloud.bigtable.client import Table + from google.cloud.bigtable.client import Table # pragma: no cover class MutationsBatcher: @@ -58,13 +58,13 @@ def __init__( flush_interval: int = 5, metadata: list[tuple[str, str]] | None = None, ): - pass + raise NotImplementedError async def append(self, row_key: str | bytes, mutation: Mutation | list[Mutation]): """ Add a new mutation to the internal queue """ - pass + raise NotImplementedError async def append_conditional( self, @@ -78,7 +78,7 @@ async def append_conditional( Calls check_and_mutate_row internally on flush """ - pass + raise NotImplementedError async def flush(self): """ @@ -87,18 +87,18 @@ async def flush(self): Raises: - MutationsExceptionGroup if any mutation in the batch fails """ - pass + raise NotImplementedError async def __aenter__(self): """For context manager API""" - pass + raise NotImplementedError async def __aexit__(self, exc_type, exc, tb): """For context manager API""" - pass + raise NotImplementedError async def close(self): """ Flush queue and clean up resources """ - pass + raise NotImplementedError diff --git a/owlbot.py b/owlbot.py index 92ceb17a5..d7eb3eaf2 100644 --- a/owlbot.py +++ b/owlbot.py @@ -89,7 +89,7 @@ def get_staging_dirs( samples=True, # set to True only if there are samples split_system_tests=True, microgenerator=True, - cov_level=100, + cov_level=99, ) s.move(templated_files, excludes=[".coveragerc", "README.rst", ".github/release-please.yml"]) From a0a5c577a53e8d76c1a9930f55a371a124c5f1ca Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 16:39:46 -0800 Subject: [PATCH 039/349] ran blacken --- google/cloud/bigtable/exceptions.py | 2 +- google/cloud/bigtable/mutations_batcher.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 2f9bf399a..86bfe9247 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -32,11 +32,11 @@ def __init__(self, message, excs): raise NotImplementedError() - class MutationsExceptionGroup(BigtableExceptionGroup): """ Represents one or more exceptions that occur during a bulk mutation operation """ + pass diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 3c6cfc3d1..2e393cc7e 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -22,7 +22,7 @@ from google.cloud.bigtable.row_filters import RowFilter if TYPE_CHECKING: - from google.cloud.bigtable.client import Table # pragma: no cover + from google.cloud.bigtable.client import Table # pragma: no cover class MutationsBatcher: From 64a05d834a3940cf37ea5b94be2011cf6da0e6cb Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 16:45:58 -0800 Subject: [PATCH 040/349] changed cover value in noxfile --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 688262f6f..8ce6d5d95 100644 --- a/noxfile.py +++ b/noxfile.py @@ -318,7 +318,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=100") + session.run("coverage", "report", "--show-missing", "--fail-under=99") session.run("coverage", "erase") From 741147dee416c6d644a44e6332693511e2918498 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 17:34:13 -0800 Subject: [PATCH 041/349] updated fork --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index e50970aa7..4186187f4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "gapic-generator-fork"] path = gapic-generator-fork - url = git@github.com:daniel-sanche/gapic-generator-python.git + url = git@github.com:googleapis/gapic-generator-python.git From 005900cde0dadaed5accc0cb98c5cf57820ef7b3 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 8 Mar 2023 17:35:00 -0800 Subject: [PATCH 042/349] added pool transport to tests --- gapic-generator-fork | 2 +- tests/unit/gapic/bigtable_v2/test_bigtable.py | 573 +++++++++++++++++- 2 files changed, 567 insertions(+), 8 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 03fa6f3c5..099f88e34 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 03fa6f3c5ebac267ba97ecd9616f146bed1f1c4b +Subproject commit 099f88e3492f87a63ae26e1cc213ef0711018c4e diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index e52fc1aba..08a29bec5 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -44,6 +44,8 @@ from google.cloud.bigtable_v2.types import data from google.cloud.bigtable_v2.types import request_stats from google.oauth2 import service_account +from google.protobuf import duration_pb2 # type: ignore +from google.protobuf import timestamp_pb2 # type: ignore import google.auth @@ -91,6 +93,7 @@ def test__get_default_mtls_endpoint(): [ (BigtableClient, "grpc"), (BigtableAsyncClient, "grpc_asyncio"), + (PooledBigtableAsyncClient, "pooled_grpc_asyncio"), ], ) def test_bigtable_client_from_service_account_info(client_class, transport_name): @@ -112,6 +115,7 @@ def test_bigtable_client_from_service_account_info(client_class, transport_name) [ (transports.BigtableGrpcTransport, "grpc"), (transports.BigtableGrpcAsyncIOTransport, "grpc_asyncio"), + (PooledBigtableAsyncClient, "pooled_grpc_asyncio"), ], ) def test_bigtable_client_service_account_always_use_jwt( @@ -137,6 +141,7 @@ def test_bigtable_client_service_account_always_use_jwt( [ (BigtableClient, "grpc"), (BigtableAsyncClient, "grpc_asyncio"), + (PooledBigtableAsyncClient, "pooled_grpc_asyncio"), ], ) def test_bigtable_client_from_service_account_file(client_class, transport_name): @@ -176,6 +181,11 @@ def test_bigtable_client_get_transport_class(): [ (BigtableClient, transports.BigtableGrpcTransport, "grpc"), (BigtableAsyncClient, transports.BigtableGrpcAsyncIOTransport, "grpc_asyncio"), + ( + PooledBigtableAsyncClient, + transports.PooledBigtableGrpcAsyncIOTransport, + "pooled_grpc_asyncio", + ), ], ) @mock.patch.object( @@ -310,6 +320,12 @@ def test_bigtable_client_client_options(client_class, transport_class, transport "grpc_asyncio", "true", ), + ( + PooledBigtableAsyncClient, + transports.PooledBigtableGrpcAsyncIOTransport, + "pooled_grpc_asyncio", + "true", + ), (BigtableClient, transports.BigtableGrpcTransport, "grpc", "false"), ( BigtableAsyncClient, @@ -317,6 +333,12 @@ def test_bigtable_client_client_options(client_class, transport_class, transport "grpc_asyncio", "false", ), + ( + PooledBigtableAsyncClient, + transports.PooledBigtableGrpcAsyncIOTransport, + "pooled_grpc_asyncio", + "false", + ), ], ) @mock.patch.object( @@ -424,7 +446,9 @@ def test_bigtable_client_mtls_env_auto( ) -@pytest.mark.parametrize("client_class", [BigtableClient, BigtableAsyncClient]) +@pytest.mark.parametrize( + "client_class", [BigtableClient, BigtableAsyncClient, PooledBigtableAsyncClient] +) @mock.patch.object( BigtableClient, "DEFAULT_ENDPOINT", modify_default_endpoint(BigtableClient) ) @@ -506,6 +530,11 @@ def test_bigtable_client_get_mtls_endpoint_and_cert_source(client_class): [ (BigtableClient, transports.BigtableGrpcTransport, "grpc"), (BigtableAsyncClient, transports.BigtableGrpcAsyncIOTransport, "grpc_asyncio"), + ( + PooledBigtableAsyncClient, + transports.PooledBigtableGrpcAsyncIOTransport, + "pooled_grpc_asyncio", + ), ], ) def test_bigtable_client_client_options_scopes( @@ -541,6 +570,12 @@ def test_bigtable_client_client_options_scopes( "grpc_asyncio", grpc_helpers_async, ), + ( + PooledBigtableAsyncClient, + transports.PooledBigtableGrpcAsyncIOTransport, + "grpc_asyncio", + grpc_helpers_async, + ), ], ) def test_bigtable_client_client_options_credentials_file( @@ -2434,6 +2469,523 @@ async def test_read_modify_write_row_flattened_error_async(): ) +@pytest.mark.parametrize( + "request_type", + [ + bigtable.GenerateInitialChangeStreamPartitionsRequest, + dict, + ], +) +def test_generate_initial_change_stream_partitions( + request_type, transport: str = "grpc" +): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.generate_initial_change_stream_partitions), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = iter( + [bigtable.GenerateInitialChangeStreamPartitionsResponse()] + ) + response = client.generate_initial_change_stream_partitions(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == bigtable.GenerateInitialChangeStreamPartitionsRequest() + + # Establish that the response is the type that we expect. + for message in response: + assert isinstance( + message, bigtable.GenerateInitialChangeStreamPartitionsResponse + ) + + +def test_generate_initial_change_stream_partitions_empty_call(): + # This test is a coverage failsafe to make sure that totally empty calls, + # i.e. request == None and no flattened fields passed, work. + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport="grpc", + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.generate_initial_change_stream_partitions), "__call__" + ) as call: + client.generate_initial_change_stream_partitions() + call.assert_called() + _, args, _ = call.mock_calls[0] + assert args[0] == bigtable.GenerateInitialChangeStreamPartitionsRequest() + + +@pytest.mark.asyncio +async def test_generate_initial_change_stream_partitions_async( + transport: str = "grpc_asyncio", + request_type=bigtable.GenerateInitialChangeStreamPartitionsRequest, +): + client = BigtableAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.generate_initial_change_stream_partitions), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = mock.Mock(aio.UnaryStreamCall, autospec=True) + call.return_value.read = mock.AsyncMock( + side_effect=[bigtable.GenerateInitialChangeStreamPartitionsResponse()] + ) + response = await client.generate_initial_change_stream_partitions(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == bigtable.GenerateInitialChangeStreamPartitionsRequest() + + # Establish that the response is the type that we expect. + message = await response.read() + assert isinstance(message, bigtable.GenerateInitialChangeStreamPartitionsResponse) + + +@pytest.mark.asyncio +async def test_generate_initial_change_stream_partitions_async_from_dict(): + await test_generate_initial_change_stream_partitions_async(request_type=dict) + + +def test_generate_initial_change_stream_partitions_field_headers(): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = bigtable.GenerateInitialChangeStreamPartitionsRequest() + + request.table_name = "table_name_value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.generate_initial_change_stream_partitions), "__call__" + ) as call: + call.return_value = iter( + [bigtable.GenerateInitialChangeStreamPartitionsResponse()] + ) + client.generate_initial_change_stream_partitions(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "table_name=table_name_value", + ) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_generate_initial_change_stream_partitions_field_headers_async(): + client = BigtableAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = bigtable.GenerateInitialChangeStreamPartitionsRequest() + + request.table_name = "table_name_value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.generate_initial_change_stream_partitions), "__call__" + ) as call: + call.return_value = mock.Mock(aio.UnaryStreamCall, autospec=True) + call.return_value.read = mock.AsyncMock( + side_effect=[bigtable.GenerateInitialChangeStreamPartitionsResponse()] + ) + await client.generate_initial_change_stream_partitions(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "table_name=table_name_value", + ) in kw["metadata"] + + +def test_generate_initial_change_stream_partitions_flattened(): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.generate_initial_change_stream_partitions), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = iter( + [bigtable.GenerateInitialChangeStreamPartitionsResponse()] + ) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.generate_initial_change_stream_partitions( + table_name="table_name_value", + app_profile_id="app_profile_id_value", + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + arg = args[0].table_name + mock_val = "table_name_value" + assert arg == mock_val + arg = args[0].app_profile_id + mock_val = "app_profile_id_value" + assert arg == mock_val + + +def test_generate_initial_change_stream_partitions_flattened_error(): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.generate_initial_change_stream_partitions( + bigtable.GenerateInitialChangeStreamPartitionsRequest(), + table_name="table_name_value", + app_profile_id="app_profile_id_value", + ) + + +@pytest.mark.asyncio +async def test_generate_initial_change_stream_partitions_flattened_async(): + client = BigtableAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.generate_initial_change_stream_partitions), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = iter( + [bigtable.GenerateInitialChangeStreamPartitionsResponse()] + ) + + call.return_value = mock.Mock(aio.UnaryStreamCall, autospec=True) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.generate_initial_change_stream_partitions( + table_name="table_name_value", + app_profile_id="app_profile_id_value", + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + arg = args[0].table_name + mock_val = "table_name_value" + assert arg == mock_val + arg = args[0].app_profile_id + mock_val = "app_profile_id_value" + assert arg == mock_val + + +@pytest.mark.asyncio +async def test_generate_initial_change_stream_partitions_flattened_error_async(): + client = BigtableAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.generate_initial_change_stream_partitions( + bigtable.GenerateInitialChangeStreamPartitionsRequest(), + table_name="table_name_value", + app_profile_id="app_profile_id_value", + ) + + +@pytest.mark.parametrize( + "request_type", + [ + bigtable.ReadChangeStreamRequest, + dict, + ], +) +def test_read_change_stream(request_type, transport: str = "grpc"): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.read_change_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = iter([bigtable.ReadChangeStreamResponse()]) + response = client.read_change_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == bigtable.ReadChangeStreamRequest() + + # Establish that the response is the type that we expect. + for message in response: + assert isinstance(message, bigtable.ReadChangeStreamResponse) + + +def test_read_change_stream_empty_call(): + # This test is a coverage failsafe to make sure that totally empty calls, + # i.e. request == None and no flattened fields passed, work. + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport="grpc", + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.read_change_stream), "__call__" + ) as call: + client.read_change_stream() + call.assert_called() + _, args, _ = call.mock_calls[0] + assert args[0] == bigtable.ReadChangeStreamRequest() + + +@pytest.mark.asyncio +async def test_read_change_stream_async( + transport: str = "grpc_asyncio", request_type=bigtable.ReadChangeStreamRequest +): + client = BigtableAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.read_change_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = mock.Mock(aio.UnaryStreamCall, autospec=True) + call.return_value.read = mock.AsyncMock( + side_effect=[bigtable.ReadChangeStreamResponse()] + ) + response = await client.read_change_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == bigtable.ReadChangeStreamRequest() + + # Establish that the response is the type that we expect. + message = await response.read() + assert isinstance(message, bigtable.ReadChangeStreamResponse) + + +@pytest.mark.asyncio +async def test_read_change_stream_async_from_dict(): + await test_read_change_stream_async(request_type=dict) + + +def test_read_change_stream_field_headers(): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = bigtable.ReadChangeStreamRequest() + + request.table_name = "table_name_value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.read_change_stream), "__call__" + ) as call: + call.return_value = iter([bigtable.ReadChangeStreamResponse()]) + client.read_change_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "table_name=table_name_value", + ) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_read_change_stream_field_headers_async(): + client = BigtableAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = bigtable.ReadChangeStreamRequest() + + request.table_name = "table_name_value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.read_change_stream), "__call__" + ) as call: + call.return_value = mock.Mock(aio.UnaryStreamCall, autospec=True) + call.return_value.read = mock.AsyncMock( + side_effect=[bigtable.ReadChangeStreamResponse()] + ) + await client.read_change_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "table_name=table_name_value", + ) in kw["metadata"] + + +def test_read_change_stream_flattened(): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.read_change_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = iter([bigtable.ReadChangeStreamResponse()]) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.read_change_stream( + table_name="table_name_value", + app_profile_id="app_profile_id_value", + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + arg = args[0].table_name + mock_val = "table_name_value" + assert arg == mock_val + arg = args[0].app_profile_id + mock_val = "app_profile_id_value" + assert arg == mock_val + + +def test_read_change_stream_flattened_error(): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.read_change_stream( + bigtable.ReadChangeStreamRequest(), + table_name="table_name_value", + app_profile_id="app_profile_id_value", + ) + + +@pytest.mark.asyncio +async def test_read_change_stream_flattened_async(): + client = BigtableAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.read_change_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = iter([bigtable.ReadChangeStreamResponse()]) + + call.return_value = mock.Mock(aio.UnaryStreamCall, autospec=True) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.read_change_stream( + table_name="table_name_value", + app_profile_id="app_profile_id_value", + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + arg = args[0].table_name + mock_val = "table_name_value" + assert arg == mock_val + arg = args[0].app_profile_id + mock_val = "app_profile_id_value" + assert arg == mock_val + + +@pytest.mark.asyncio +async def test_read_change_stream_flattened_error_async(): + client = BigtableAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.read_change_stream( + bigtable.ReadChangeStreamRequest(), + table_name="table_name_value", + app_profile_id="app_profile_id_value", + ) + + def test_credentials_transport_error(): # It is an error to provide credentials and a transport instance. transport = transports.BigtableGrpcTransport( @@ -2515,6 +3067,7 @@ def test_transport_get_channel(): [ transports.BigtableGrpcTransport, transports.BigtableGrpcAsyncIOTransport, + transports.PooledBigtableGrpcAsyncIOTransport, ], ) def test_transport_adc(transport_class): @@ -2578,6 +3131,8 @@ def test_bigtable_base_transport(): "check_and_mutate_row", "ping_and_warm", "read_modify_write_row", + "generate_initial_change_stream_partitions", + "read_change_stream", ) for method in methods: with pytest.raises(NotImplementedError): @@ -2658,6 +3213,7 @@ def test_bigtable_auth_adc(): [ transports.BigtableGrpcTransport, transports.BigtableGrpcAsyncIOTransport, + transports.PooledBigtableGrpcAsyncIOTransport, ], ) def test_bigtable_transport_auth_adc(transport_class): @@ -2685,6 +3241,7 @@ def test_bigtable_transport_auth_adc(transport_class): [ transports.BigtableGrpcTransport, transports.BigtableGrpcAsyncIOTransport, + transports.PooledBigtableGrpcAsyncIOTransport, ], ) def test_bigtable_transport_auth_gdch_credentials(transport_class): @@ -2746,7 +3303,11 @@ def test_bigtable_transport_create_channel(transport_class, grpc_helpers): @pytest.mark.parametrize( "transport_class", - [transports.BigtableGrpcTransport, transports.BigtableGrpcAsyncIOTransport], + [ + transports.BigtableGrpcTransport, + transports.BigtableGrpcAsyncIOTransport, + transports.PooledBigtableGrpcAsyncIOTransport, + ], ) def test_bigtable_grpc_transport_client_cert_source_for_mtls(transport_class): cred = ga_credentials.AnonymousCredentials() @@ -2787,11 +3348,7 @@ def test_bigtable_grpc_transport_client_cert_source_for_mtls(transport_class): @pytest.mark.parametrize( - "transport_name", - [ - "grpc", - "grpc_asyncio", - ], + "transport_name", ["grpc", "grpc_asyncio", "pooled_grpc_asyncio"] ) def test_bigtable_host_no_port(transport_name): client = BigtableClient( @@ -2809,6 +3366,7 @@ def test_bigtable_host_no_port(transport_name): [ "grpc", "grpc_asyncio", + "pooled_grpc_asyncio", ], ) def test_bigtable_host_with_port(transport_name): @@ -3167,6 +3725,7 @@ def test_client_ctx(): [ (BigtableClient, transports.BigtableGrpcTransport), (BigtableAsyncClient, transports.BigtableGrpcAsyncIOTransport), + (PooledBigtableAsyncClient, transports.PooledBigtableGrpcAsyncIOTransport), ], ) def test_api_key_credentials(client_class, transport_class): From 9983e18838dc5c1fc438bf3de964bb62d59db87c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 9 Mar 2023 14:34:31 -0800 Subject: [PATCH 043/349] fixed issues in tests --- .../services/bigtable/transports/__init__.py | 3 +++ tests/unit/gapic/bigtable_v2/test_bigtable.py | 22 +++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/__init__.py b/google/cloud/bigtable_v2/services/bigtable/transports/__init__.py index 67a9abdf9..adac0ee9d 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/__init__.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/__init__.py @@ -19,15 +19,18 @@ from .base import BigtableTransport from .grpc import BigtableGrpcTransport from .grpc_asyncio import BigtableGrpcAsyncIOTransport +from .pooled_grpc_asyncio import PooledBigtableGrpcAsyncIOTransport # Compile a registry of transports. _transport_registry = OrderedDict() # type: Dict[str, Type[BigtableTransport]] _transport_registry["grpc"] = BigtableGrpcTransport _transport_registry["grpc_asyncio"] = BigtableGrpcAsyncIOTransport +_transport_registry["pooled_grpc_asyncio"] = PooledBigtableGrpcAsyncIOTransport __all__ = ( "BigtableTransport", "BigtableGrpcTransport", "BigtableGrpcAsyncIOTransport", + "PooledBigtableGrpcAsyncIOTransport", ) diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index 08a29bec5..993d6a404 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -93,7 +93,7 @@ def test__get_default_mtls_endpoint(): [ (BigtableClient, "grpc"), (BigtableAsyncClient, "grpc_asyncio"), - (PooledBigtableAsyncClient, "pooled_grpc_asyncio"), + (BigtableAsyncClient, "pooled_grpc_asyncio"), ], ) def test_bigtable_client_from_service_account_info(client_class, transport_name): @@ -115,7 +115,7 @@ def test_bigtable_client_from_service_account_info(client_class, transport_name) [ (transports.BigtableGrpcTransport, "grpc"), (transports.BigtableGrpcAsyncIOTransport, "grpc_asyncio"), - (PooledBigtableAsyncClient, "pooled_grpc_asyncio"), + (transports.PooledBigtableGrpcAsyncIOTransport, "pooled_grpc_asyncio"), ], ) def test_bigtable_client_service_account_always_use_jwt( @@ -141,7 +141,7 @@ def test_bigtable_client_service_account_always_use_jwt( [ (BigtableClient, "grpc"), (BigtableAsyncClient, "grpc_asyncio"), - (PooledBigtableAsyncClient, "pooled_grpc_asyncio"), + (BigtableAsyncClient, "pooled_grpc_asyncio"), ], ) def test_bigtable_client_from_service_account_file(client_class, transport_name): @@ -182,7 +182,7 @@ def test_bigtable_client_get_transport_class(): (BigtableClient, transports.BigtableGrpcTransport, "grpc"), (BigtableAsyncClient, transports.BigtableGrpcAsyncIOTransport, "grpc_asyncio"), ( - PooledBigtableAsyncClient, + BigtableAsyncClient, transports.PooledBigtableGrpcAsyncIOTransport, "pooled_grpc_asyncio", ), @@ -321,7 +321,7 @@ def test_bigtable_client_client_options(client_class, transport_class, transport "true", ), ( - PooledBigtableAsyncClient, + BigtableAsyncClient, transports.PooledBigtableGrpcAsyncIOTransport, "pooled_grpc_asyncio", "true", @@ -334,7 +334,7 @@ def test_bigtable_client_client_options(client_class, transport_class, transport "false", ), ( - PooledBigtableAsyncClient, + BigtableAsyncClient, transports.PooledBigtableGrpcAsyncIOTransport, "pooled_grpc_asyncio", "false", @@ -446,9 +446,7 @@ def test_bigtable_client_mtls_env_auto( ) -@pytest.mark.parametrize( - "client_class", [BigtableClient, BigtableAsyncClient, PooledBigtableAsyncClient] -) +@pytest.mark.parametrize("client_class", [BigtableClient, BigtableAsyncClient]) @mock.patch.object( BigtableClient, "DEFAULT_ENDPOINT", modify_default_endpoint(BigtableClient) ) @@ -531,7 +529,7 @@ def test_bigtable_client_get_mtls_endpoint_and_cert_source(client_class): (BigtableClient, transports.BigtableGrpcTransport, "grpc"), (BigtableAsyncClient, transports.BigtableGrpcAsyncIOTransport, "grpc_asyncio"), ( - PooledBigtableAsyncClient, + BigtableAsyncClient, transports.PooledBigtableGrpcAsyncIOTransport, "pooled_grpc_asyncio", ), @@ -571,7 +569,7 @@ def test_bigtable_client_client_options_scopes( grpc_helpers_async, ), ( - PooledBigtableAsyncClient, + BigtableAsyncClient, transports.PooledBigtableGrpcAsyncIOTransport, "grpc_asyncio", grpc_helpers_async, @@ -3725,7 +3723,7 @@ def test_client_ctx(): [ (BigtableClient, transports.BigtableGrpcTransport), (BigtableAsyncClient, transports.BigtableGrpcAsyncIOTransport), - (PooledBigtableAsyncClient, transports.PooledBigtableGrpcAsyncIOTransport), + (BigtableAsyncClient, transports.PooledBigtableGrpcAsyncIOTransport), ], ) def test_api_key_credentials(client_class, transport_class): From bfeb5463bc8c4e695247fda197ef984ca5d2cc8d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 9 Mar 2023 15:21:29 -0800 Subject: [PATCH 044/349] got gapic tests passing --- gapic-generator-fork | 2 +- .../transports/pooled_grpc_asyncio.py | 5 +- tests/unit/gapic/bigtable_v2/test_bigtable.py | 63 ++++++++++++++++--- 3 files changed, 61 insertions(+), 9 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 099f88e34..b64e7a335 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 099f88e3492f87a63ae26e1cc213ef0711018c4e +Subproject commit b64e7a3351804613aaf56d7a699390d8dbc70712 diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index bfccfaae3..1a96750eb 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -151,9 +151,12 @@ def __init__( Raises: google.auth.exceptions.MutualTlsChannelError: If mutual TLS transport creation failed for any reason. - google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` and ``credentials_file`` are passed. + ValueError: if ``pool_size`` <= 0 """ + if pool_size <= 0: + raise ValueError(f"invalid pool_size: {pool_size}") self._ssl_channel_credentials = ssl_channel_credentials self._stubs: Dict[Tuple[aio.Channel, str], Callable] = {} self._next_idx = 0 diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index 993d6a404..e5d24184f 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -571,7 +571,7 @@ def test_bigtable_client_client_options_scopes( ( BigtableAsyncClient, transports.PooledBigtableGrpcAsyncIOTransport, - "grpc_asyncio", + "pooled_grpc_asyncio", grpc_helpers_async, ), ], @@ -3301,11 +3301,7 @@ def test_bigtable_transport_create_channel(transport_class, grpc_helpers): @pytest.mark.parametrize( "transport_class", - [ - transports.BigtableGrpcTransport, - transports.BigtableGrpcAsyncIOTransport, - transports.PooledBigtableGrpcAsyncIOTransport, - ], + [transports.BigtableGrpcTransport, transports.BigtableGrpcAsyncIOTransport], ) def test_bigtable_grpc_transport_client_cert_source_for_mtls(transport_class): cred = ga_credentials.AnonymousCredentials() @@ -3345,6 +3341,60 @@ def test_bigtable_grpc_transport_client_cert_source_for_mtls(transport_class): ) +@pytest.mark.parametrize( + "transport_class", [transports.PooledBigtableGrpcAsyncIOTransport] +) +def test_bigtable_pooled_grpc_transport_client_cert_source_for_mtls(transport_class): + cred = ga_credentials.AnonymousCredentials() + + # test with invalid pool size + with pytest.raises(ValueError): + transport_class( + host="squid.clam.whelk", + credentials=cred, + pool_size=0, + ) + + # Check ssl_channel_credentials is used if provided. + for pool_num in range(1, 5): + with mock.patch.object( + transport_class, "create_channel" + ) as mock_create_channel: + mock_ssl_channel_creds = mock.Mock() + transport_class( + host="squid.clam.whelk", + credentials=cred, + ssl_channel_credentials=mock_ssl_channel_creds, + pool_size=pool_num, + ) + mock_create_channel.assert_called_with( + "squid.clam.whelk:443", + credentials=cred, + credentials_file=None, + scopes=None, + ssl_credentials=mock_ssl_channel_creds, + quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + assert mock_create_channel.call_count == pool_num + + # Check if ssl_channel_credentials is not provided, then client_cert_source_for_mtls + # is used. + with mock.patch.object(transport_class, "create_channel", return_value=mock.Mock()): + with mock.patch("grpc.ssl_channel_credentials") as mock_ssl_cred: + transport_class( + credentials=cred, + client_cert_source_for_mtls=client_cert_source_callback, + ) + expected_cert, expected_key = client_cert_source_callback() + mock_ssl_cred.assert_called_once_with( + certificate_chain=expected_cert, private_key=expected_key + ) + + @pytest.mark.parametrize( "transport_name", ["grpc", "grpc_asyncio", "pooled_grpc_asyncio"] ) @@ -3723,7 +3773,6 @@ def test_client_ctx(): [ (BigtableClient, transports.BigtableGrpcTransport), (BigtableAsyncClient, transports.BigtableGrpcAsyncIOTransport), - (BigtableAsyncClient, transports.PooledBigtableGrpcAsyncIOTransport), ], ) def test_api_key_credentials(client_class, transport_class): From dba7a3cb83e8fc27eee2dfd00d1eebbd9e43933a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 10 Mar 2023 15:31:13 -0800 Subject: [PATCH 045/349] reworked the client to take instance at init --- google/cloud/bigtable/client.py | 71 ++++++++++++--------------------- 1 file changed, 25 insertions(+), 46 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index e85606ef9..8eef06948 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -26,6 +26,7 @@ PooledBigtableGrpcAsyncIOTransport, ) from google.cloud.client import ClientWithProject +from google.api_core.exceptions import GoogleAPICallError import google.auth.credentials @@ -42,6 +43,7 @@ class BigtableDataClient(ClientWithProject): def __init__( self, + instance_id: str, *, project: str | None = None, pool_size: int = 3, @@ -55,6 +57,7 @@ def __init__( Create a client instance for the Bigtable Data API Args: + instance_id: The Bigram instance ID to associate with this client project: the project which the client acts on behalf of. If not passed, falls back to the default inferred from the environment. @@ -88,12 +91,13 @@ def __init__( self.transport: PooledBigtableGrpcAsyncIOTransport = cast( PooledBigtableGrpcAsyncIOTransport, self._gapic_client.transport ) - self._active_instances: set[str] = set() + self.instance_id = instance_id # background tasks will be started when an instance is registered # with the client in `get_table` self._channel_refresh_tasks: list[asyncio.Task[None]] = [] + self._channel_init_time = time.time() - async def _ping_and_warm_instances(self, channel: grpc.aio.Channel) -> None: + async def _ping_and_warm_instance(self, channel: grpc.aio.Channel) -> None: """ Prepares the backend for requests on a channel @@ -107,8 +111,10 @@ async def _ping_and_warm_instances(self, channel: grpc.aio.Channel) -> None: ping_rpc = channel.unary_unary( "/google.bigtable.v2.Bigtable/PingAndWarmChannel" ) - tasks = [ping_rpc({"name": n}) for n in self._active_instances] - return await asyncio.gather(*tasks, return_exceptions=True) + try: + return await ping_rpc({"name": f"projects/{self.project}/instances/{instance_id}"}) + except GoogleAPICallError as e: + return e async def _manage_channel( self, @@ -130,10 +136,12 @@ async def _manage_channel( grace_period: time to allow previous channel to serve existing requests before closing, in seconds """ - # warm the current channel immediately - channel = self.transport.channel_pool[channel_idx] - await self._ping_and_warm_instances(channel) - next_sleep = refresh_interval + first_refresh = self._channel_init_time + refresh_interval + next_sleep = first_refresh - time.time() + if next_sleep > 0: + # warm the current channel immediately + channel = self.transport.channel_pool[channel_idx] + await self._ping_and_warm_instance(channel) # continuously refresh the channel every `refresh_interval` seconds while True: await asyncio.sleep(next_sleep) @@ -150,86 +158,57 @@ async def _manage_channel( ("grpc.max_receive_message_length", -1), ], ) - await self._ping_and_warm_instances(channel) + await self._ping_and_warm_instance(channel) # cycle channel out of use, with long grace window before closure start_timestamp = time.time() await self.transport.replace_channel(channel_idx, grace_period, new_channel) # subtract the time spent waiting for the channel to be replaced next_sleep = refresh_interval - (time.time() - start_timestamp) - async def register_instance(self, instance_id: str): + async def start_channel_refresh_tasks(self) -> None: """ - Registers an instance with the client, and warms the channel pool - for the instance - - The client will periodically refresh grpc channel pool used to make - requests, and new channels will be warmed for each registered instance + Starts background tasks to periodically refresh and warm grpc channels - Channels will not be refreshed unless at least one instance is registered + Runs continuously until the client is closed """ - instance_name = self._gapic_client.instance_path(self.project, instance_id) - self._active_instances.add(instance_name) - if self._channel_refresh_tasks: - # refresh tasks already running - # call ping and warm on all existing channels - for channel in self.transport.channel_pool: - await self._ping_and_warm_instances(channel) - else: - # refresh tasks aren't active. start them as background tasks + if not self._channel_refresh_tasks: for channel_idx in range(len(self.transport.channel_pool)): refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) self._channel_refresh_tasks.append(refresh_task) - async def remove_instance_registration(self, instance_id: str): - """ - Removes an instance from the client's registered instances, to prevent - warming new channels for the instance - """ - instance_name = self._gapic_client.instance_path(self.project, instance_id) - self._active_instances.remove(instance_name) - async def get_table( self, - instance_id: str, table_id: str, app_profile_id: str | None = None, - *, - register_instance: bool = True, ) -> Table: """ Returns a table instance for making data API requests Args: - instance_id: The ID of the instance that owns the table. table_id: The ID of the table. app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles - register_instance: if True, the client will call `register_instance` on - the `instance_id`, to periodically warm and refresh the channel - pool for the specified instance """ - if register_instance: - await self.register_instance(instance_id) - return Table(self, instance_id, table_id, app_profile_id) + # ensure channel refresh tasks have started + await start_channel_refresh_tasks() + return Table(self, table_id, app_profile_id) class Table: """ Main Data API surface - Table object maintains instance_id, table_id, and app_profile_id context, and passes them with + Table object maintains table_id, and app_profile_id context, and passes them with each call """ def __init__( self, client: BigtableDataClient, - instance_id: str, table_id: str, app_profile_id: str | None = None, ): self.client = client - self.instance_id = instance_id self.table_id = table_id self.app_profile_id = app_profile_id From 3ae67224c62ed18ee45de1dc5c6e5a439ba22d8c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 10 Mar 2023 16:07:59 -0800 Subject: [PATCH 046/349] moved background setup back into client init, with warning if no async loop --- google/cloud/bigtable/client.py | 49 +++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 8eef06948..d7947b8ff 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -20,6 +20,7 @@ import asyncio import grpc import time +import warnings from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( @@ -92,12 +93,34 @@ def __init__( PooledBigtableGrpcAsyncIOTransport, self._gapic_client.transport ) self.instance_id = instance_id - # background tasks will be started when an instance is registered - # with the client in `get_table` - self._channel_refresh_tasks: list[asyncio.Task[None]] = [] + # attempt to start background tasks self._channel_init_time = time.time() + self._channel_refresh_tasks: list[asyncio.Task[None]] = [] + try: + self.start_background_channel_refresh() + except RuntimeError: + warnings.warn( + "BigtableDataClient should be started in an " + "asyncio event loop. Channel refresh will not be started" + ) + + def start_background_channel_refresh(self) -> None: + """ + Starts a background task to ping and warm each channel in the pool - async def _ping_and_warm_instance(self, channel: grpc.aio.Channel) -> None: + Raises: + - RuntimeError if not called in an asyncio event loop + """ + if not self._channel_refresh_tasks: + # raise RuntimeError if there is no event loop + asyncio.get_running_loop() + for channel_idx in range(len(self.transport.channel_pool)): + refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) + self._channel_refresh_tasks.append(refresh_task) + + async def _ping_and_warm_instance( + self, channel: grpc.aio.Channel + ) -> Exception | None: """ Prepares the backend for requests on a channel @@ -112,7 +135,9 @@ async def _ping_and_warm_instance(self, channel: grpc.aio.Channel) -> None: "/google.bigtable.v2.Bigtable/PingAndWarmChannel" ) try: - return await ping_rpc({"name": f"projects/{self.project}/instances/{instance_id}"}) + return await ping_rpc( + {"name": f"projects/{self.project}/instances/{self.instance_id}"} + ) except GoogleAPICallError as e: return e @@ -165,18 +190,7 @@ async def _manage_channel( # subtract the time spent waiting for the channel to be replaced next_sleep = refresh_interval - (time.time() - start_timestamp) - async def start_channel_refresh_tasks(self) -> None: - """ - Starts background tasks to periodically refresh and warm grpc channels - - Runs continuously until the client is closed - """ - if not self._channel_refresh_tasks: - for channel_idx in range(len(self.transport.channel_pool)): - refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) - self._channel_refresh_tasks.append(refresh_task) - - async def get_table( + def get_table( self, table_id: str, app_profile_id: str | None = None, @@ -190,7 +204,6 @@ async def get_table( https://cloud.google.com/bigtable/docs/app-profiles """ # ensure channel refresh tasks have started - await start_channel_refresh_tasks() return Table(self, table_id, app_profile_id) From b9f2b0d8e2e0419ebb90ac47a907d6f9de9afe79 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 10 Mar 2023 16:22:33 -0800 Subject: [PATCH 047/349] improved comment --- google/cloud/bigtable/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index d7947b8ff..1381f8aa3 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -59,6 +59,7 @@ def __init__( Args: instance_id: The Bigram instance ID to associate with this client + instance_id is combined with project to fully specify the instance project: the project which the client acts on behalf of. If not passed, falls back to the default inferred from the environment. From d673f1b68bf11050aaf3bc998a199e4594591986 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 16 Mar 2023 09:31:31 -0700 Subject: [PATCH 048/349] implemented query class --- google/cloud/bigtable/read_rows_query.py | 210 +++++++++++++++++++++-- 1 file changed, 198 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 64583b2d7..3f4ef1ebb 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -13,36 +13,162 @@ # limitations under the License. # from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any +from .row_response import row_key +from dataclasses import dataclass +from google.cloud.bigtable.row_filters import RowFilter if TYPE_CHECKING: - from google.cloud.bigtable.row_filters import RowFilter from google.cloud.bigtable import RowKeySamples +@dataclass +class _RangePoint: + # model class for a point in a row range + key: row_key + is_inclusive: bool + + class ReadRowsQuery: """ Class to encapsulate details of a read row request """ def __init__( - self, row_keys: list[str | bytes] | str | bytes | None = None, limit=None + self, + row_keys: list[str | bytes] | str | bytes | None = None, + limit: int | None = None, + row_filter: RowFilter | dict[str, Any] | None = None, ): - pass + """ + Create a new ReadRowsQuery - def set_limit(self, limit: int) -> ReadRowsQuery: - raise NotImplementedError + Args: + - row_keys: a list of row keys to include in the query + - limit: the maximum number of rows to return. None or 0 means no limit + default: None (no limit) + - row_filter: a RowFilter to apply to the query + """ + self.row_keys: set[bytes] = set() + self.row_ranges: list[tuple[_RangePoint | None, _RangePoint | None]] = [] + if row_keys: + self.add_rows(row_keys) + self.limit: int | None = limit + self.filter: RowFilter | dict[str, Any] = row_filter - def set_filter(self, filter: "RowFilter") -> ReadRowsQuery: - raise NotImplementedError + def set_limit(self, new_limit: int | None): + """ + Set the maximum number of rows to return by this query. - def add_rows(self, row_id_list: list[str]) -> ReadRowsQuery: - raise NotImplementedError + None or 0 means no limit + + Args: + - new_limit: the new limit to apply to this query + Returns: + - a reference to this query for chaining + Raises: + - ValueError if new_limit is < 0 + """ + if new_limit is not None and new_limit < 0: + raise ValueError("limit must be >= 0") + self._limit = new_limit + return self + + def set_filter( + self, row_filter: RowFilter | dict[str, Any] | None + ) -> ReadRowsQuery: + """ + Set a RowFilter to apply to this query + + Args: + - row_filter: a RowFilter to apply to this query + Can be a RowFilter object or a dict representation + Returns: + - a reference to this query for chaining + """ + if not ( + isinstance(row_filter, dict) + or isinstance(row_filter, RowFilter) + or row_filter is None + ): + raise ValueError( + "row_filter must be a RowFilter or corresponding dict representation" + ) + self._filter = row_filter + return self + + def add_rows(self, row_keys: list[str | bytes] | str | bytes) -> ReadRowsQuery: + """ + Add a list of row keys to this query + + Args: + - row_keys: a list of row keys to add to this query + Returns: + - a reference to this query for chaining + Raises: + - ValueError if an input is not a string or bytes + """ + if not isinstance(row_keys, list): + row_keys = [row_keys] + update_set = set() + for k in row_keys: + if isinstance(k, str): + k = k.encode() + elif not isinstance(k, bytes): + raise ValueError("row_keys must be strings or bytes") + update_set.add(k) + self.row_keys.update(update_set) + return self def add_range( - self, start_key: str | bytes | None = None, end_key: str | bytes | None = None + self, + start_key: str | bytes | None = None, + end_key: str | bytes | None = None, + start_is_inclusive: bool | None = None, + end_is_inclusive: bool | None = None, ) -> ReadRowsQuery: - raise NotImplementedError + """ + Add a range of row keys to this query. + + Args: + - start_key: the start of the range + if None, start_key is interpreted as the empty string, inclusive + - end_key: the end of the range + if None, end_key is interpreted as the infinite row key, exclusive + - start_is_inclusive: if True, the start key is included in the range + defaults to True if None. Must not be included if start_key is None + - end_is_inclusive: if True, the end key is included in the range + defaults to False if None. Must not be included if end_key is None + """ + # check for invalid combinations of arguments + if start_is_inclusive is None: + start_is_inclusive = True + elif start_key is None: + raise ValueError( + "start_is_inclusive must not be included if start_key is None" + ) + if end_is_inclusive is None: + end_is_inclusive = False + elif end_key is None: + raise ValueError("end_is_inclusive must not be included if end_key is None") + # ensure that start_key and end_key are bytes + if isinstance(start_key, str): + start_key = start_key.encode() + elif start_key is not None and not isinstance(start_key, bytes): + raise ValueError("start_key must be a string or bytes") + if isinstance(end_key, str): + end_key = end_key.encode() + elif end_key is not None and not isinstance(end_key, bytes): + raise ValueError("end_key must be a string or bytes") + + start_pt = ( + _RangePoint(start_key, start_is_inclusive) + if start_key is not None + else None + ) + end_pt = _RangePoint(end_key, end_is_inclusive) if end_key is not None else None + self.row_ranges.append((start_pt, end_pt)) + return self def shard(self, shard_keys: "RowKeySamples" | None = None) -> list[ReadRowsQuery]: """ @@ -54,3 +180,63 @@ def shard(self, shard_keys: "RowKeySamples" | None = None) -> list[ReadRowsQuery query (if possible) """ raise NotImplementedError + + def to_dict(self) -> dict[str, Any]: + """ + Convert this query into a dictionary that can be used to construct a + ReadRowsRequest protobuf + """ + ranges = [] + for start, end in self.row_ranges: + new_range = {} + if start is not None: + key = "start_key_closed" if start.is_inclusive else "start_key_open" + new_range[key] = start.key + if end is not None: + key = "end_key_closed" if end.is_inclusive else "end_key_open" + new_range[key] = end.key + ranges.append(new_range) + row_keys = list(self.row_keys) + row_keys.sort() + row_set = {"row_keys": row_keys, "row_ranges": ranges} + final_dict: dict[str, Any] = { + "rows": row_set, + } + dict_filter = ( + self.filter.to_dict() if isinstance(self.filter, RowFilter) else self.filter + ) + if dict_filter: + final_dict["filter"] = dict_filter + if self.limit is not None: + final_dict["rows_limit"] = self.limit + return final_dict + + # Support limit and filter as properties + + @property + def limit(self) -> int | None: + """ + Getter implementation for limit property + """ + return self._limit + + @limit.setter + def limit(self, new_limit: int | None): + """ + Setter implementation for limit property + """ + self.set_limit(new_limit) + + @property + def filter(self): + """ + Getter implemntation for filter property + """ + return self._filter + + @filter.setter + def filter(self, row_filter: RowFilter | dict[str, Any] | None): + """ + Setter implementation for filter property + """ + self.set_filter(row_filter) From 5fe8c778948c8db5b2f134049900ef1b1013b0dc Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 16 Mar 2023 09:31:56 -0700 Subject: [PATCH 049/349] added query tests --- tests/unit/test_read_rows_query.py | 284 +++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 tests/unit/test_read_rows_query.py diff --git a/tests/unit/test_read_rows_query.py b/tests/unit/test_read_rows_query.py new file mode 100644 index 000000000..eb924edaa --- /dev/null +++ b/tests/unit/test_read_rows_query.py @@ -0,0 +1,284 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +TEST_ROWS = [ + "row_key_1", + b"row_key_2", +] + + +class TestReadRowsQuery(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigtable.read_rows_query import ReadRowsQuery + + return ReadRowsQuery + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + def test_ctor_defaults(self): + query = self._make_one() + self.assertEqual(query.row_keys, set()) + self.assertEqual(query.row_ranges, []) + self.assertEqual(query.filter, None) + self.assertEqual(query.limit, None) + + def test_ctor_explicit(self): + from google.cloud.bigtable.row_filters import RowFilterChain + + filter_ = RowFilterChain() + query = self._make_one(["row_key_1", "row_key_2"], limit=10, row_filter=filter_) + self.assertEqual(len(query.row_keys), 2) + self.assertIn("row_key_1".encode(), query.row_keys) + self.assertIn("row_key_2".encode(), query.row_keys) + self.assertEqual(query.row_ranges, []) + self.assertEqual(query.filter, filter_) + self.assertEqual(query.limit, 10) + + def test_ctor_invalid_limit(self): + with self.assertRaises(ValueError): + self._make_one(limit=-1) + + def test_set_filter(self): + from google.cloud.bigtable.row_filters import RowFilterChain + + filter1 = RowFilterChain() + query = self._make_one() + self.assertEqual(query.filter, None) + result = query.set_filter(filter1) + self.assertEqual(query.filter, filter1) + self.assertEqual(result, query) + filter2 = RowFilterChain() + result = query.set_filter(filter2) + self.assertEqual(query.filter, filter2) + result = query.set_filter(None) + self.assertEqual(query.filter, None) + self.assertEqual(result, query) + query.filter = RowFilterChain() + self.assertEqual(query.filter, RowFilterChain()) + with self.assertRaises(ValueError): + query.filter = 1 + + def test_set_filter_dict(self): + from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable_v2.types.bigtable import ReadRowsRequest + + filter1 = RowSampleFilter(0.5) + filter1_dict = filter1.to_dict() + query = self._make_one() + self.assertEqual(query.filter, None) + result = query.set_filter(filter1_dict) + self.assertEqual(query.filter, filter1_dict) + self.assertEqual(result, query) + output = query.to_dict() + self.assertEqual(output["filter"], filter1_dict) + proto_output = ReadRowsRequest(**output) + self.assertEqual(proto_output.filter, filter1.to_pb()) + + query.filter = None + self.assertEqual(query.filter, None) + + def test_set_limit(self): + query = self._make_one() + self.assertEqual(query.limit, None) + result = query.set_limit(10) + self.assertEqual(query.limit, 10) + self.assertEqual(result, query) + query.limit = 9 + self.assertEqual(query.limit, 9) + result = query.set_limit(0) + self.assertEqual(query.limit, 0) + self.assertEqual(result, query) + with self.assertRaises(ValueError): + query.set_limit(-1) + with self.assertRaises(ValueError): + query.limit = -100 + + def test_add_rows_str(self): + query = self._make_one() + self.assertEqual(query.row_keys, set()) + input_str = "test_row" + result = query.add_rows(input_str) + self.assertEqual(len(query.row_keys), 1) + self.assertIn(input_str.encode(), query.row_keys) + self.assertEqual(result, query) + input_str2 = "test_row2" + result = query.add_rows(input_str2) + self.assertEqual(len(query.row_keys), 2) + self.assertIn(input_str.encode(), query.row_keys) + self.assertIn(input_str2.encode(), query.row_keys) + self.assertEqual(result, query) + + def test_add_rows_bytes(self): + query = self._make_one() + self.assertEqual(query.row_keys, set()) + input_bytes = b"test_row" + result = query.add_rows(input_bytes) + self.assertEqual(len(query.row_keys), 1) + self.assertIn(input_bytes, query.row_keys) + self.assertEqual(result, query) + input_bytes2 = b"test_row2" + result = query.add_rows(input_bytes2) + self.assertEqual(len(query.row_keys), 2) + self.assertIn(input_bytes, query.row_keys) + self.assertIn(input_bytes2, query.row_keys) + self.assertEqual(result, query) + + def test_add_rows_batch(self): + query = self._make_one() + self.assertEqual(query.row_keys, set()) + input_batch = ["test_row", b"test_row2", "test_row3"] + result = query.add_rows(input_batch) + self.assertEqual(len(query.row_keys), 3) + self.assertIn(b"test_row", query.row_keys) + self.assertIn(b"test_row2", query.row_keys) + self.assertIn(b"test_row3", query.row_keys) + self.assertEqual(result, query) + # test adding another batch + query.add_rows(["test_row4", b"test_row5"]) + self.assertEqual(len(query.row_keys), 5) + self.assertIn(input_batch[0].encode(), query.row_keys) + self.assertIn(input_batch[1], query.row_keys) + self.assertIn(input_batch[2].encode(), query.row_keys) + self.assertIn(b"test_row4", query.row_keys) + self.assertIn(b"test_row5", query.row_keys) + + def test_add_rows_invalid(self): + query = self._make_one() + with self.assertRaises(ValueError): + query.add_rows(1) + with self.assertRaises(ValueError): + query.add_rows(["s", 0]) + + def test_duplicate_rows(self): + # should only hold one of each input key + key_1 = b"test_row" + key_2 = b"test_row2" + query = self._make_one(row_keys=[key_1, key_1, key_2]) + self.assertEqual(len(query.row_keys), 2) + self.assertIn(key_1, query.row_keys) + self.assertIn(key_2, query.row_keys) + key_3 = "test_row3" + query.add_rows([key_3 for _ in range(10)]) + self.assertEqual(len(query.row_keys), 3) + + def test_add_range(self): + # test with start and end keys + query = self._make_one() + self.assertEqual(query.row_ranges, []) + result = query.add_range("test_row", "test_row2") + self.assertEqual(len(query.row_ranges), 1) + self.assertEqual(query.row_ranges[0][0].key, "test_row".encode()) + self.assertEqual(query.row_ranges[0][1].key, "test_row2".encode()) + self.assertEqual(query.row_ranges[0][0].is_inclusive, True) + self.assertEqual(query.row_ranges[0][1].is_inclusive, False) + self.assertEqual(result, query) + # test with start key only + result = query.add_range("test_row3") + self.assertEqual(len(query.row_ranges), 2) + self.assertEqual(query.row_ranges[1][0].key, "test_row3".encode()) + self.assertEqual(query.row_ranges[1][1], None) + self.assertEqual(result, query) + # test with end key only + result = query.add_range(start_key=None, end_key="test_row5") + self.assertEqual(len(query.row_ranges), 3) + self.assertEqual(query.row_ranges[2][0], None) + self.assertEqual(query.row_ranges[2][1].key, "test_row5".encode()) + self.assertEqual(query.row_ranges[2][1].is_inclusive, False) + # test with start and end keys and inclusive flags + result = query.add_range(b"test_row6", b"test_row7", False, True) + self.assertEqual(len(query.row_ranges), 4) + self.assertEqual(query.row_ranges[3][0].key, b"test_row6") + self.assertEqual(query.row_ranges[3][1].key, b"test_row7") + self.assertEqual(query.row_ranges[3][0].is_inclusive, False) + self.assertEqual(query.row_ranges[3][1].is_inclusive, True) + # test with nothing passed + result = query.add_range() + self.assertEqual(len(query.row_ranges), 5) + self.assertEqual(query.row_ranges[4][0], None) + self.assertEqual(query.row_ranges[4][1], None) + # test with inclusive flags only + with self.assertRaises(ValueError): + query.add_range(start_is_inclusive=True, end_is_inclusive=True) + with self.assertRaises(ValueError): + query.add_range(start_is_inclusive=False, end_is_inclusive=False) + with self.assertRaises(ValueError): + query.add_range(start_is_inclusive=False) + with self.assertRaises(ValueError): + query.add_range(end_is_inclusive=True) + + def test_to_dict_rows_default(self): + # dictionary should be in rowset proto format + from google.cloud.bigtable_v2.types.bigtable import ReadRowsRequest + + query = self._make_one() + output = query.to_dict() + self.assertTrue(isinstance(output, dict)) + self.assertEqual(len(output.keys()), 1) + expected = {"rows": {"row_keys": [], "row_ranges": []}} + self.assertEqual(output, expected) + + request_proto = ReadRowsRequest(**output) + self.assertEqual(request_proto.rows.row_keys, []) + self.assertEqual(request_proto.rows.row_ranges, []) + self.assertFalse(request_proto.filter) + self.assertEqual(request_proto.rows_limit, 0) + + def test_to_dict_rows_populated(self): + # dictionary should be in rowset proto format + from google.cloud.bigtable_v2.types.bigtable import ReadRowsRequest + from google.cloud.bigtable.row_filters import PassAllFilter + + row_filter = PassAllFilter(False) + query = self._make_one(limit=100, row_filter=row_filter) + query.add_range("test_row", "test_row2") + query.add_range("test_row3") + query.add_range(start_key=None, end_key="test_row5") + query.add_range(b"test_row6", b"test_row7", False, True) + query.add_range() + query.add_rows(["test_row", b"test_row2", "test_row3"]) + query.add_rows(["test_row3", b"test_row4"]) + output = query.to_dict() + self.assertTrue(isinstance(output, dict)) + request_proto = ReadRowsRequest(**output) + rowset_proto = request_proto.rows + # check rows + self.assertEqual(len(rowset_proto.row_keys), 4) + self.assertEqual(rowset_proto.row_keys[0], b"test_row") + self.assertEqual(rowset_proto.row_keys[1], b"test_row2") + self.assertEqual(rowset_proto.row_keys[2], b"test_row3") + self.assertEqual(rowset_proto.row_keys[3], b"test_row4") + # check ranges + self.assertEqual(len(rowset_proto.row_ranges), 5) + self.assertEqual(rowset_proto.row_ranges[0].start_key_closed, b"test_row") + self.assertEqual(rowset_proto.row_ranges[0].end_key_open, b"test_row2") + self.assertEqual(rowset_proto.row_ranges[1].start_key_closed, b"test_row3") + self.assertEqual(rowset_proto.row_ranges[1].end_key_open, b"") + self.assertEqual(rowset_proto.row_ranges[2].start_key_closed, b"") + self.assertEqual(rowset_proto.row_ranges[2].end_key_open, b"test_row5") + self.assertEqual(rowset_proto.row_ranges[3].start_key_open, b"test_row6") + self.assertEqual(rowset_proto.row_ranges[3].end_key_closed, b"test_row7") + self.assertEqual(rowset_proto.row_ranges[4].start_key_closed, b"") + self.assertEqual(rowset_proto.row_ranges[4].end_key_open, b"") + # check limit + self.assertEqual(request_proto.rows_limit, 100) + # check filter + filter_proto = request_proto.filter + self.assertEqual(filter_proto, row_filter.to_pb()) + + def test_shard(self): + pass From ec47f91856382dc7be183b7267ececed55c4cfa0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 16 Mar 2023 09:48:33 -0700 Subject: [PATCH 050/349] implemented row and cell response --- google/cloud/bigtable/row_response.py | 347 +++++++++++++++++++++++--- 1 file changed, 318 insertions(+), 29 deletions(-) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index be6d8c505..af714e147 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -15,7 +15,8 @@ from __future__ import annotations from collections import OrderedDict -from typing import Sequence +from typing import Sequence, Generator, Mapping, overload, Union, List, Tuple, Any +from functools import total_ordering # Type aliases used internally for readability. row_key = bytes @@ -24,7 +25,12 @@ row_value = bytes -class RowResponse(Sequence["CellResponse"]): +class RowResponse( + Sequence["CellResponse"], + Mapping[ + Union[family_id, Tuple[family_id, Union[qualifier, str]]], List["CellResponse"] + ], +): """ Model class for row data returned from server @@ -36,47 +42,237 @@ class RowResponse(Sequence["CellResponse"]): cells = row["family", "qualifier"] """ - def __init__(self, key: row_key, cells: list[CellResponse]): + def __init__( + self, + key: row_key, + cells: list[CellResponse] + | dict[tuple[family_id, qualifier], list[dict[str, Any]]], + ): + """Expected to be used internally only""" self.row_key = key - self.cells: OrderedDict[ - family_id, OrderedDict[qualifier, list[CellResponse]] + self._cells_map: dict[ + family_id, dict[qualifier, list[CellResponse]] ] = OrderedDict() - """Expected to be used internally only""" - pass + self._cells_list: list[CellResponse] = [] + if isinstance(cells, dict): + # handle dict input + tmp_list = [] + for (family, qualifier), cell_list in cells.items(): + for cell_dict in cell_list: + cell_obj = CellResponse._from_dict( + key, family, qualifier, cell_dict + ) + tmp_list.append(cell_obj) + cells = tmp_list + # add cells to internal stores using Bigtable native ordering + for cell in sorted(cells): + if cell.row_key != self.row_key: + raise ValueError( + f"CellResponse row_key ({cell.row_key!r}) does not match RowResponse key ({self.row_key!r})" + ) + if cell.family not in self._cells_map: + self._cells_map[cell.family] = OrderedDict() + if cell.column_qualifier not in self._cells_map[cell.family]: + self._cells_map[cell.family][cell.column_qualifier] = [] + self._cells_map[cell.family][cell.column_qualifier].append(cell) + self._cells_list.append(cell) def get_cells( - self, family: str | None, qualifer: str | bytes | None + self, family: str | None = None, qualifier: str | bytes | None = None ) -> list[CellResponse]: """ Returns cells sorted in Bigtable native order: - Family lexicographically ascending - - Qualifier lexicographically ascending + - Qualifier ascending - Timestamp in reverse chronological order If family or qualifier not passed, will include all - Syntactic sugar: cells = row["family", "qualifier"] + Can also be accessed through indexing: + cells = row["family", "qualifier"] + cells = row["family"] """ - raise NotImplementedError + if family is None: + if qualifier is not None: + # get_cells(None, "qualifier") is not allowed + raise ValueError("Qualifier passed without family") + else: + # return all cells on get_cells() + return self._cells_list + if qualifier is None: + # return all cells in family on get_cells(family) + return list(self._get_all_from_family(family)) + if isinstance(qualifier, str): + qualifier = qualifier.encode("utf-8") + # return cells in family and qualifier on get_cells(family, qualifier) + if family not in self._cells_map: + raise ValueError(f"Family '{family}' not found in row '{self.row_key!r}'") + if qualifier not in self._cells_map[family]: + raise ValueError( + f"Qualifier '{qualifier!r}' not found in family '{family}' in row '{self.row_key!r}'" + ) + return self._cells_map[family][qualifier] - def get_index(self) -> dict[family_id, list[qualifier]]: + def _get_all_from_family( + self, family: family_id + ) -> Generator[CellResponse, None, None]: """ - Returns a list of family and qualifiers for the object + Returns all cells in the row """ - raise NotImplementedError + if family not in self._cells_map: + raise ValueError(f"Family '{family}' not found in row '{self.row_key!r}'") + qualifier_dict = self._cells_map.get(family, {}) + for cell_batch in qualifier_dict.values(): + for cell in cell_batch: + yield cell - def __str__(self): + def __str__(self) -> str: """ Human-readable string representation - (family, qualifier) cells - (ABC, XYZ) [b"123", b"456" ...(+5)] - (DEF, XYZ) [b"123"] - (GHI, XYZ) [b"123", b"456" ...(+2)] + { + (family='fam', qualifier=b'col'): [b'value', (+1 more),], + (family='fam', qualifier=b'col2'): [b'other'], + } + """ + output = ["{"] + for key in self.keys(): + if len(self[key]) == 0: + output.append(f" {key}: []") + elif len(self[key]) == 1: + output.append( + f" (family='{key[0]}', qualifier={key[1]}): [{self[key][0]}]," + ) + else: + output.append( + f" (family='{key[0]}', qualifier={key[1]}): [{self[key][0]}, (+{len(self[key])-1} more)]," + ) + output.append("}") + return "\n".join(output) + + def __repr__(self): + cell_str_buffer = ["{"] + for key, cell_list in self.items(): + repr_list = [cell.to_dict(use_nanoseconds=True) for cell in cell_list] + cell_str_buffer.append(f" ('{key[0]}', {key[1]}): {repr_list},") + cell_str_buffer.append("}") + cell_str = "\n".join(cell_str_buffer) + output = f"RowResponse(key={self.row_key!r}, cells={cell_str})" + return output + + def to_dict(self) -> dict[str, Any]: """ - raise NotImplementedError + Returns a dictionary representation of the cell in the Bigtable Row + proto format + + https://cloud.google.com/bigtable/docs/reference/data/rpc/google.bigtable.v2#row + """ + families_list: list[dict[str, Any]] = [] + for family in self._cells_map: + column_list: list[dict[str, Any]] = [] + for qualifier in self._cells_map[family]: + cells_list: list[dict[str, Any]] = [] + for cell in self._cells_map[family][qualifier]: + cells_list.append(cell.to_dict()) + column_list.append({"qualifier": qualifier, "cells": cells_list}) + families_list.append({"name": family, "columns": column_list}) + return {"key": self.row_key, "families": families_list} + + # Sequence and Mapping methods + def __iter__(self): + # iterate as a sequence; yield all cells + for cell in self._cells_list: + yield cell + + def __contains__(self, item): + if isinstance(item, family_id): + # check if family key is in RowResponse + return item in self._cells_map + elif ( + isinstance(item, tuple) + and isinstance(item[0], family_id) + and isinstance(item[1], (qualifier, str)) + ): + # check if (family, qualifier) pair is in RowResponse + qualifer = item[1] if isinstance(item[1], bytes) else item[1].encode() + return item[0] in self._cells_map and qualifer in self._cells_map[item[0]] + # check if CellResponse is in RowResponse + return item in self._cells_list + + @overload + def __getitem__( + self, + index: family_id | tuple[family_id, qualifier | str], + ) -> List[CellResponse]: + # overload signature for type checking + pass + + @overload + def __getitem__(self, index: int, /) -> CellResponse: + # overload signature for type checking + pass + + @overload + def __getitem__(self, index: slice) -> list[CellResponse]: + # overload signature for type checking + pass + + def __getitem__(self, index): + if isinstance(index, family_id): + return self.get_cells(family=index) + elif ( + isinstance(index, tuple) + and isinstance(index[0], family_id) + and isinstance(index[1], (qualifier, str)) + ): + return self.get_cells(family=index[0], qualifier=index[1]) + elif isinstance(index, int) or isinstance(index, slice): + # index is int or slice + return self._cells_list[index] + else: + raise TypeError( + "Index must be family_id, (family_id, qualifier), int, or slice" + ) + + def __len__(self): + return len(self._cells_list) + + def keys(self): + key_list = [] + for family in self._cells_map: + for qualifier in self._cells_map[family]: + key_list.append((family, qualifier)) + return key_list + + def values(self): + return self._cells_list + def items(self): + for key in self.keys(): + yield key, self[key] + def __eq__(self, other): + # for performance reasons, check row metadata + # before checking individual cells + if not isinstance(other, RowResponse): + return False + if self.row_key != other.row_key: + return False + if len(self._cells_list) != len(other._cells_list): + return False + keys, other_keys = self.keys(), other.keys() + if keys != other_keys: + return False + for key in keys: + if len(self[key]) != len(other[key]): + return False + # compare individual cell lists + if self._cells_list != other._cells_list: + return False + return True + + +@total_ordering class CellResponse: """ Model class for cell data @@ -91,20 +287,51 @@ def __init__( value: row_value, row: row_key, family: family_id, - column_qualifier: qualifier, + column_qualifier: qualifier | str, + timestamp_ns: int, labels: list[str] | None = None, - timestamp: int | None = None, ): + """ + CellResponse constructor + + CellResponse objects are not intended to be constructed by users. + They are returned by the Bigtable backend. + """ self.value = value self.row_key = row self.family = family + if isinstance(column_qualifier, str): + column_qualifier = column_qualifier.encode() self.column_qualifier = column_qualifier - self.labels = labels - self.timestamp = timestamp + self.timestamp_ns = timestamp_ns + self.labels = labels if labels is not None else [] - def decode_value(self, encoding="UTF-8", errors=None) -> str: - """decode bytes to string""" - return self.value.decode(encoding, errors) + @staticmethod + def _from_dict( + row_key: bytes, family: str, qualifier: bytes, cell_dict: dict[str, Any] + ) -> CellResponse: + """ + Helper function to create CellResponse from a dictionary + + CellResponse objects are not intended to be constructed by users. + They are returned by the Bigtable backend. + """ + # Bigtable backend will use microseconds for timestamps, + # but the Python library prefers nanoseconds where possible + timestamp = cell_dict.get( + "timestamp_ns", cell_dict.get("timestamp_micros", -1) * 1000 + ) + if timestamp < 0: + raise ValueError("invalid timestamp") + cell_obj = CellResponse( + cell_dict["value"], + row_key, + family, + qualifier, + timestamp, + cell_dict.get("labels", None), + ) + return cell_obj def __int__(self) -> int: """ @@ -114,6 +341,24 @@ def __int__(self) -> int: """ return int.from_bytes(self.value, byteorder="big", signed=True) + def to_dict(self, use_nanoseconds=False) -> dict[str, Any]: + """ + Returns a dictionary representation of the cell in the Bigtable Cell + proto format + + https://cloud.google.com/bigtable/docs/reference/data/rpc/google.bigtable.v2#cell + """ + cell_dict: dict[str, Any] = { + "value": self.value, + } + if use_nanoseconds: + cell_dict["timestamp_ns"] = self.timestamp_ns + else: + cell_dict["timestamp_micros"] = self.timestamp_ns // 1000 + if self.labels: + cell_dict["labels"] = self.labels + return cell_dict + def __str__(self) -> str: """ Allows casting cell to str @@ -121,10 +366,54 @@ def __str__(self) -> str: """ return str(self.value) + def __repr__(self): + return f"CellResponse(value={self.value!r}, row={self.row_key!r}, family='{self.family}', column_qualifier={self.column_qualifier!r}, timestamp_ns={self.timestamp_ns}, labels={self.labels})" + """For Bigtable native ordering""" def __lt__(self, other) -> bool: - raise NotImplementedError + if not isinstance(other, CellResponse): + return NotImplemented + this_ordering = ( + self.family, + self.column_qualifier, + -self.timestamp_ns, + self.value, + self.labels, + ) + other_ordering = ( + other.family, + other.column_qualifier, + -other.timestamp_ns, + other.value, + other.labels, + ) + return this_ordering < other_ordering def __eq__(self, other) -> bool: - raise NotImplementedError + if not isinstance(other, CellResponse): + return NotImplemented + return ( + self.row_key == other.row_key + and self.family == other.family + and self.column_qualifier == other.column_qualifier + and self.value == other.value + and self.timestamp_ns == other.timestamp_ns + and len(self.labels) == len(other.labels) + and all([label in other.labels for label in self.labels]) + ) + + def __ne__(self, other) -> bool: + return not self == other + + def __hash__(self): + return hash( + ( + self.row_key, + self.family, + self.column_qualifier, + self.value, + self.timestamp_ns, + tuple(self.labels), + ) + ) From a40c00c0fcedc78ac3effd69d5511e860b686e97 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 16 Mar 2023 09:49:03 -0700 Subject: [PATCH 051/349] added tests for row and cell response --- tests/unit/test_row_response.py | 786 ++++++++++++++++++++++++++++++++ 1 file changed, 786 insertions(+) create mode 100644 tests/unit/test_row_response.py diff --git a/tests/unit/test_row_response.py b/tests/unit/test_row_response.py new file mode 100644 index 000000000..de46902e5 --- /dev/null +++ b/tests/unit/test_row_response.py @@ -0,0 +1,786 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import time + +TEST_VALUE = b"1234" +TEST_ROW_KEY = b"row" +TEST_FAMILY_ID = "cf1" +TEST_QUALIFIER = b"col" +TEST_TIMESTAMP = time.time_ns() +TEST_LABELS = ["label1", "label2"] + + +class TestRowResponse(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigtable.row_response import RowResponse + + return RowResponse + + def _make_one(self, *args, **kwargs): + if len(args) == 0: + args = (TEST_ROW_KEY, [self._make_cell()]) + return self._get_target_class()(*args, **kwargs) + + def _make_cell( + self, + value=TEST_VALUE, + row_key=TEST_ROW_KEY, + family_id=TEST_FAMILY_ID, + qualifier=TEST_QUALIFIER, + timestamp=TEST_TIMESTAMP, + labels=TEST_LABELS, + ): + from google.cloud.bigtable.row_response import CellResponse + + return CellResponse(value, row_key, family_id, qualifier, timestamp, labels) + + def test_ctor(self): + cells = [self._make_cell(), self._make_cell()] + row_response = self._make_one(TEST_ROW_KEY, cells) + self.assertEqual(list(row_response), cells) + self.assertEqual(row_response.row_key, TEST_ROW_KEY) + + def test_ctor_dict(self): + cells = { + (TEST_FAMILY_ID, TEST_QUALIFIER): [ + self._make_cell().to_dict(), + self._make_cell().to_dict(use_nanoseconds=True), + ] + } + row_response = self._make_one(TEST_ROW_KEY, cells) + self.assertEqual(row_response.row_key, TEST_ROW_KEY) + self.assertEqual(len(row_response), 2) + for i in range(2): + self.assertEqual(row_response[i].value, TEST_VALUE) + self.assertEqual(row_response[i].row_key, TEST_ROW_KEY) + self.assertEqual(row_response[i].family, TEST_FAMILY_ID) + self.assertEqual(row_response[i].column_qualifier, TEST_QUALIFIER) + self.assertEqual(row_response[i].labels, TEST_LABELS) + self.assertEqual(row_response[0].timestamp_ns, TEST_TIMESTAMP) + # second cell was initialized with use_nanoseconds=False, so it doesn't have full precision + self.assertEqual(row_response[1].timestamp_ns, TEST_TIMESTAMP // 1000 * 1000) + + def test_ctor_bad_cell(self): + cells = [self._make_cell(), self._make_cell()] + cells[1].row_key = b"other" + with self.assertRaises(ValueError): + self._make_one(TEST_ROW_KEY, cells) + + def test_cell_order(self): + # cells should be ordered on init + cell1 = self._make_cell(value=b"1") + cell2 = self._make_cell(value=b"2") + resp = self._make_one(TEST_ROW_KEY, [cell2, cell1]) + output = list(resp) + self.assertEqual(output, [cell1, cell2]) + + def test_get_cells(self): + cell_list = [] + for family_id in ["1", "2"]: + for qualifier in [b"a", b"b"]: + cell = self._make_cell(family_id=family_id, qualifier=qualifier) + cell_list.append(cell) + # test getting all cells + row_response = self._make_one(TEST_ROW_KEY, cell_list) + self.assertEqual(row_response.get_cells(), cell_list) + # test getting cells in a family + output = row_response.get_cells(family="1") + self.assertEqual(len(output), 2) + self.assertEqual(output[0].family, "1") + self.assertEqual(output[1].family, "1") + self.assertEqual(output[0], cell_list[0]) + # test getting cells in a family/qualifier + # should accept bytes or str for qualifier + for q in [b"a", "a"]: + output = row_response.get_cells(family="1", qualifier=q) + self.assertEqual(len(output), 1) + self.assertEqual(output[0].family, "1") + self.assertEqual(output[0].column_qualifier, b"a") + self.assertEqual(output[0], cell_list[0]) + # calling with just qualifier should raise an error + with self.assertRaises(ValueError): + row_response.get_cells(qualifier=b"a") + # test calling with bad family or qualifier + with self.assertRaises(ValueError): + row_response.get_cells(family="3", qualifier=b"a") + with self.assertRaises(ValueError): + row_response.get_cells(family="3") + with self.assertRaises(ValueError): + row_response.get_cells(family="1", qualifier=b"c") + + def test__repr__(self): + from google.cloud.bigtable.row_response import CellResponse + from google.cloud.bigtable.row_response import RowResponse + + cell_str = ( + "{'value': b'1234', 'timestamp_ns': %d, 'labels': ['label1', 'label2']}" + % (TEST_TIMESTAMP) + ) + expected_prefix = "RowResponse(key=b'row', cells=" + row = self._make_one(TEST_ROW_KEY, [self._make_cell()]) + self.assertIn(expected_prefix, repr(row)) + self.assertIn(cell_str, repr(row)) + expected_full = ( + "RowResponse(key=b'row', cells={\n ('cf1', b'col'): [{'value': b'1234', 'timestamp_ns': %d, 'labels': ['label1', 'label2']}],\n})" + % (TEST_TIMESTAMP) + ) + self.assertEqual(expected_full, repr(row)) + # should be able to construct instance from __repr__ + result = eval(repr(row)) + self.assertEqual(result, row) + self.assertIsInstance(result, RowResponse) + self.assertIsInstance(result[0], CellResponse) + # try with multiple cells + row = self._make_one(TEST_ROW_KEY, [self._make_cell(), self._make_cell()]) + self.assertIn(expected_prefix, repr(row)) + self.assertIn(cell_str, repr(row)) + # should be able to construct instance from __repr__ + result = eval(repr(row)) + self.assertEqual(result, row) + self.assertIsInstance(result, RowResponse) + self.assertEqual(len(result), 2) + self.assertIsInstance(result[0], CellResponse) + self.assertIsInstance(result[1], CellResponse) + + def test___str__(self): + cells = { + ("3", TEST_QUALIFIER): [ + self._make_cell().to_dict(), + self._make_cell().to_dict(), + self._make_cell().to_dict(), + ] + } + cells[("1", TEST_QUALIFIER)] = [self._make_cell().to_dict()] + + row_response = self._make_one(TEST_ROW_KEY, cells) + expected = ( + "{\n" + + " (family='1', qualifier=b'col'): [b'1234'],\n" + + " (family='3', qualifier=b'col'): [b'1234', (+2 more)],\n" + + "}" + ) + self.assertEqual(expected, str(row_response)) + + def test_to_dict(self): + from google.cloud.bigtable_v2.types import Row + + cell1 = self._make_cell() + cell2 = self._make_cell() + cell2.value = b"other" + row = self._make_one(TEST_ROW_KEY, [cell1, cell2]) + row_dict = row.to_dict() + expected_dict = { + "key": TEST_ROW_KEY, + "families": [ + { + "name": TEST_FAMILY_ID, + "columns": [ + { + "qualifier": TEST_QUALIFIER, + "cells": [ + { + "value": TEST_VALUE, + "timestamp_micros": TEST_TIMESTAMP // 1000, + "labels": TEST_LABELS, + }, + { + "value": b"other", + "timestamp_micros": TEST_TIMESTAMP // 1000, + "labels": TEST_LABELS, + }, + ], + } + ], + }, + ], + } + self.assertEqual(len(row_dict), len(expected_dict)) + for key, value in expected_dict.items(): + self.assertEqual(row_dict[key], value) + # should be able to construct a Cell proto from the dict + row_proto = Row(**row_dict) + self.assertEqual(row_proto.key, TEST_ROW_KEY) + self.assertEqual(len(row_proto.families), 1) + family = row_proto.families[0] + self.assertEqual(family.name, TEST_FAMILY_ID) + self.assertEqual(len(family.columns), 1) + column = family.columns[0] + self.assertEqual(column.qualifier, TEST_QUALIFIER) + self.assertEqual(len(column.cells), 2) + self.assertEqual(column.cells[0].value, TEST_VALUE) + self.assertEqual(column.cells[0].timestamp_micros, TEST_TIMESTAMP // 1000) + self.assertEqual(column.cells[0].labels, TEST_LABELS) + self.assertEqual(column.cells[1].value, cell2.value) + self.assertEqual(column.cells[1].timestamp_micros, TEST_TIMESTAMP // 1000) + self.assertEqual(column.cells[1].labels, TEST_LABELS) + + def test_iteration(self): + from types import GeneratorType + from google.cloud.bigtable.row_response import CellResponse + + # should be able to iterate over the RowResponse as a list + cell3 = self._make_cell(value=b"3") + cell1 = self._make_cell(value=b"1") + cell2 = self._make_cell(value=b"2") + row_response = self._make_one(TEST_ROW_KEY, [cell3, cell1, cell2]) + self.assertEqual(len(row_response), 3) + # should create generator object + self.assertIsInstance(iter(row_response), GeneratorType) + result_list = list(row_response) + self.assertEqual(len(result_list), 3) + # should be able to iterate over all cells + idx = 0 + for cell in row_response: + self.assertIsInstance(cell, CellResponse) + self.assertEqual(cell.value, result_list[idx].value) + self.assertEqual(cell.value, str(idx + 1).encode()) + idx += 1 + + def test_contains_cell(self): + cell3 = self._make_cell(value=b"3") + cell1 = self._make_cell(value=b"1") + cell2 = self._make_cell(value=b"2") + cell4 = self._make_cell(value=b"4") + row_response = self._make_one(TEST_ROW_KEY, [cell3, cell1, cell2]) + self.assertIn(cell1, row_response) + self.assertIn(cell2, row_response) + self.assertNotIn(cell4, row_response) + cell3_copy = self._make_cell(value=b"3") + self.assertIn(cell3_copy, row_response) + + def test_contains_family_id(self): + new_family_id = "new_family_id" + cell = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell2 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + new_family_id, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + row_response = self._make_one(TEST_ROW_KEY, [cell, cell2]) + self.assertIn(TEST_FAMILY_ID, row_response) + self.assertIn("new_family_id", row_response) + self.assertIn(new_family_id, row_response) + self.assertNotIn("not_a_family_id", row_response) + self.assertNotIn(None, row_response) + + def test_contains_family_qualifier_tuple(self): + new_family_id = "new_family_id" + new_qualifier = b"new_qualifier" + cell = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell2 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + new_family_id, + new_qualifier, + TEST_TIMESTAMP, + TEST_LABELS, + ) + row_response = self._make_one(TEST_ROW_KEY, [cell, cell2]) + self.assertIn((TEST_FAMILY_ID, TEST_QUALIFIER), row_response) + self.assertIn(("new_family_id", "new_qualifier"), row_response) + self.assertIn(("new_family_id", b"new_qualifier"), row_response) + self.assertIn((new_family_id, new_qualifier), row_response) + + self.assertNotIn(("not_a_family_id", TEST_QUALIFIER), row_response) + self.assertNotIn((TEST_FAMILY_ID, "not_a_qualifier"), row_response) + self.assertNotIn((TEST_FAMILY_ID, new_qualifier), row_response) + self.assertNotIn(("not_a_family_id", "not_a_qualifier"), row_response) + self.assertNotIn((None, None), row_response) + self.assertNotIn(None, row_response) + + def test_int_indexing(self): + # should be able to index into underlying list with an index number directly + cell_list = [self._make_cell(value=str(i).encode()) for i in range(10)] + sorted(cell_list) + row_response = self._make_one(TEST_ROW_KEY, cell_list) + self.assertEqual(len(row_response), 10) + for i in range(10): + self.assertEqual(row_response[i].value, str(i).encode()) + # backwards indexing should work + self.assertEqual(row_response[-i - 1].value, str(9 - i).encode()) + with self.assertRaises(IndexError): + row_response[10] + with self.assertRaises(IndexError): + row_response[-11] + + def test_slice_indexing(self): + # should be able to index with a range of indices + cell_list = [self._make_cell(value=str(i).encode()) for i in range(10)] + sorted(cell_list) + row_response = self._make_one(TEST_ROW_KEY, cell_list) + self.assertEqual(len(row_response), 10) + self.assertEqual(len(row_response[0:10]), 10) + self.assertEqual(row_response[0:10], cell_list) + self.assertEqual(len(row_response[0:]), 10) + self.assertEqual(row_response[0:], cell_list) + self.assertEqual(len(row_response[:10]), 10) + self.assertEqual(row_response[:10], cell_list) + self.assertEqual(len(row_response[0:10:1]), 10) + self.assertEqual(row_response[0:10:1], cell_list) + self.assertEqual(len(row_response[0:10:2]), 5) + self.assertEqual(row_response[0:10:2], [cell_list[i] for i in range(0, 10, 2)]) + self.assertEqual(len(row_response[0:10:3]), 4) + self.assertEqual(row_response[0:10:3], [cell_list[i] for i in range(0, 10, 3)]) + self.assertEqual(len(row_response[10:0:-1]), 9) + self.assertEqual(len(row_response[10:0:-2]), 5) + self.assertEqual(row_response[10:0:-3], cell_list[10:0:-3]) + self.assertEqual(len(row_response[0:100]), 10) + + def test_family_indexing(self): + # should be able to retrieve cells in a family + new_family_id = "new_family_id" + cell = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell2 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell3 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + new_family_id, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + row_response = self._make_one(TEST_ROW_KEY, [cell, cell2, cell3]) + + self.assertEqual(len(row_response[TEST_FAMILY_ID]), 2) + self.assertEqual(row_response[TEST_FAMILY_ID][0], cell) + self.assertEqual(row_response[TEST_FAMILY_ID][1], cell2) + self.assertEqual(len(row_response[new_family_id]), 1) + self.assertEqual(row_response[new_family_id][0], cell3) + with self.assertRaises(ValueError): + row_response["not_a_family_id"] + with self.assertRaises(TypeError): + row_response[None] + with self.assertRaises(TypeError): + row_response[b"new_family_id"] + + def test_family_qualifier_indexing(self): + # should be able to retrieve cells in a family/qualifier tuplw + new_family_id = "new_family_id" + new_qualifier = b"new_qualifier" + cell = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell2 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell3 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + new_family_id, + new_qualifier, + TEST_TIMESTAMP, + TEST_LABELS, + ) + row_response = self._make_one(TEST_ROW_KEY, [cell, cell2, cell3]) + + self.assertEqual(len(row_response[TEST_FAMILY_ID, TEST_QUALIFIER]), 2) + self.assertEqual(row_response[TEST_FAMILY_ID, TEST_QUALIFIER][0], cell) + self.assertEqual(row_response[TEST_FAMILY_ID, TEST_QUALIFIER][1], cell2) + self.assertEqual(len(row_response[new_family_id, new_qualifier]), 1) + self.assertEqual(row_response[new_family_id, new_qualifier][0], cell3) + self.assertEqual(len(row_response["new_family_id", "new_qualifier"]), 1) + self.assertEqual(len(row_response["new_family_id", b"new_qualifier"]), 1) + with self.assertRaises(ValueError): + row_response[new_family_id, "not_a_qualifier"] + with self.assertRaises(ValueError): + row_response["not_a_family_id", new_qualifier] + with self.assertRaises(TypeError): + row_response[None, None] + with self.assertRaises(TypeError): + row_response[b"new_family_id", b"new_qualifier"] + + def test_keys(self): + # should be able to retrieve (family,qualifier) tuples as keys + new_family_id = "new_family_id" + new_qualifier = b"new_qualifier" + cell = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell2 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell3 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + new_family_id, + new_qualifier, + TEST_TIMESTAMP, + TEST_LABELS, + ) + row_response = self._make_one(TEST_ROW_KEY, [cell, cell2, cell3]) + + self.assertEqual(len(row_response.keys()), 2) + self.assertEqual( + row_response.keys(), + [(TEST_FAMILY_ID, TEST_QUALIFIER), (new_family_id, new_qualifier)], + ) + + row_response = self._make_one(TEST_ROW_KEY, []) + self.assertEqual(len(row_response.keys()), 0) + self.assertEqual(row_response.keys(), []) + + row_response = self._make_one(TEST_ROW_KEY, [cell]) + self.assertEqual(len(row_response.keys()), 1) + self.assertEqual(row_response.keys(), [(TEST_FAMILY_ID, TEST_QUALIFIER)]) + + def test_values(self): + # values should return the list of all cells + cell_list = [self._make_cell(qualifier=str(i).encode()) for i in range(10)] + row_response = self._make_one(TEST_ROW_KEY, cell_list) + sorted(cell_list) + + self.assertEqual(len(row_response.values()), 10) + self.assertEqual(row_response.values(), cell_list) + + def test_items(self): + cell_list = [self._make_cell() for i in range(10)] + sorted(cell_list) + row_response = self._make_one(TEST_ROW_KEY, cell_list) + + self.assertEqual(len(list(row_response.items())), 1) + self.assertEqual( + list(row_response.items())[0][0], (TEST_FAMILY_ID, TEST_QUALIFIER) + ) + self.assertEqual(list(row_response.items())[0][1], cell_list) + + row_response = self._make_one(TEST_ROW_KEY, []) + self.assertEqual(len(list(row_response.items())), 0) + + cell_list = [self._make_cell(qualifier=str(i).encode()) for i in range(10)] + row_response = self._make_one(TEST_ROW_KEY, cell_list) + sorted(cell_list) + self.assertEqual(len(list(row_response.items())), 10) + keys = [t[0] for t in row_response.items()] + cells = [t[1] for t in row_response.items()] + for i in range(10): + self.assertEqual(keys[i], (TEST_FAMILY_ID, str(i).encode())) + self.assertEqual(len(cells[i]), 1) + self.assertEqual(cells[i][0], cell_list[i]) + + def test_index_of(self): + # given a cell, should find index in underlying list + cell_list = [self._make_cell(value=str(i).encode()) for i in range(10)] + sorted(cell_list) + row_response = self._make_one(TEST_ROW_KEY, cell_list) + + self.assertEqual(row_response.index(cell_list[0]), 0) + self.assertEqual(row_response.index(cell_list[5]), 5) + self.assertEqual(row_response.index(cell_list[9]), 9) + with self.assertRaises(ValueError): + row_response.index(self._make_cell()) + with self.assertRaises(ValueError): + row_response.index(None) + + +class TestCellResponse(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigtable.row_response import CellResponse + + return CellResponse + + def _make_one(self, *args, **kwargs): + if len(args) == 0: + args = ( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + return self._get_target_class()(*args, **kwargs) + + def test_ctor(self): + cell = self._make_one( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + self.assertEqual(cell.value, TEST_VALUE) + self.assertEqual(cell.row_key, TEST_ROW_KEY) + self.assertEqual(cell.family, TEST_FAMILY_ID) + self.assertEqual(cell.column_qualifier, TEST_QUALIFIER) + self.assertEqual(cell.timestamp_ns, TEST_TIMESTAMP) + self.assertEqual(cell.labels, TEST_LABELS) + + def test_to_dict(self): + from google.cloud.bigtable_v2.types import Cell + + cell = self._make_one() + cell_dict = cell.to_dict() + expected_dict = { + "value": TEST_VALUE, + "timestamp_micros": TEST_TIMESTAMP // 1000, + "labels": TEST_LABELS, + } + self.assertEqual(len(cell_dict), len(expected_dict)) + for key, value in expected_dict.items(): + self.assertEqual(cell_dict[key], value) + # should be able to construct a Cell proto from the dict + cell_proto = Cell(**cell_dict) + self.assertEqual(cell_proto.value, TEST_VALUE) + self.assertEqual(cell_proto.timestamp_micros, TEST_TIMESTAMP // 1000) + self.assertEqual(cell_proto.labels, TEST_LABELS) + + def test_to_dict_nanos_timestamp(self): + cell = self._make_one() + cell_dict = cell.to_dict(use_nanoseconds=True) + expected_dict = { + "value": TEST_VALUE, + "timestamp_ns": TEST_TIMESTAMP, + "labels": TEST_LABELS, + } + self.assertEqual(len(cell_dict), len(expected_dict)) + for key, value in expected_dict.items(): + self.assertEqual(cell_dict[key], value) + + def test_to_dict_no_labels(self): + from google.cloud.bigtable_v2.types import Cell + + cell_no_labels = self._make_one( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + None, + ) + cell_dict = cell_no_labels.to_dict() + expected_dict = { + "value": TEST_VALUE, + "timestamp_micros": TEST_TIMESTAMP // 1000, + } + self.assertEqual(len(cell_dict), len(expected_dict)) + for key, value in expected_dict.items(): + self.assertEqual(cell_dict[key], value) + # should be able to construct a Cell proto from the dict + cell_proto = Cell(**cell_dict) + self.assertEqual(cell_proto.value, TEST_VALUE) + self.assertEqual(cell_proto.timestamp_micros, TEST_TIMESTAMP // 1000) + self.assertEqual(cell_proto.labels, []) + + def test_int_value(self): + test_int = 1234 + bytes_value = test_int.to_bytes(4, "big", signed=True) + cell = self._make_one( + bytes_value, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + self.assertEqual(int(cell), test_int) + # ensure string formatting works + formatted = "%d" % cell + self.assertEqual(formatted, str(test_int)) + self.assertEqual(int(formatted), test_int) + + def test_int_value_negative(self): + test_int = -99999 + bytes_value = test_int.to_bytes(4, "big", signed=True) + cell = self._make_one( + bytes_value, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + self.assertEqual(int(cell), test_int) + # ensure string formatting works + formatted = "%d" % cell + self.assertEqual(formatted, str(test_int)) + self.assertEqual(int(formatted), test_int) + + def test___str__(self): + test_value = b"helloworld" + cell = self._make_one( + test_value, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + self.assertEqual(str(cell), "b'helloworld'") + self.assertEqual(str(cell), str(test_value)) + + def test___repr__(self): + from google.cloud.bigtable.row_response import CellResponse # type: ignore # noqa: F401 + + cell = self._make_one() + expected = ( + "CellResponse(value=b'1234', row=b'row', " + + "family='cf1', column_qualifier=b'col', " + + f"timestamp_ns={TEST_TIMESTAMP}, labels=['label1', 'label2'])" + ) + self.assertEqual(repr(cell), expected) + # should be able to construct instance from __repr__ + result = eval(repr(cell)) + self.assertEqual(result, cell) + + def test___repr___no_labels(self): + from google.cloud.bigtable.row_response import CellResponse # type: ignore # noqa: F401 + + cell_no_labels = self._make_one( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + None, + ) + expected = ( + "CellResponse(value=b'1234', row=b'row', " + + "family='cf1', column_qualifier=b'col', " + + f"timestamp_ns={TEST_TIMESTAMP}, labels=[])" + ) + self.assertEqual(repr(cell_no_labels), expected) + # should be able to construct instance from __repr__ + result = eval(repr(cell_no_labels)) + self.assertEqual(result, cell_no_labels) + + def test_equality(self): + cell1 = self._make_one() + cell2 = self._make_one() + self.assertEqual(cell1, cell2) + self.assertTrue(cell1 == cell2) + args = ( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + for i in range(0, len(args)): + # try changing each argument + modified_cell = self._make_one(*args[:i], args[i] + args[i], *args[i + 1 :]) + self.assertNotEqual(cell1, modified_cell) + self.assertFalse(cell1 == modified_cell) + self.assertTrue(cell1 != modified_cell) + + def test_hash(self): + # class should be hashable + cell1 = self._make_one() + d = {cell1: 1} + cell2 = self._make_one() + self.assertEqual(d[cell2], 1) + + args = ( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + for i in range(0, len(args)): + # try changing each argument + modified_cell = self._make_one(*args[:i], args[i] + args[i], *args[i + 1 :]) + with self.assertRaises(KeyError): + d[modified_cell] + + def test_ordering(self): + # create cell list in order from lowest to highest + higher_cells = [] + i = 0 + # families; alphebetical order + for family in ["z", "y", "x"]: + # qualifiers; lowest byte value first + for qualifier in [b"z", b"y", b"x"]: + # timestamps; newest first + for timestamp in [ + TEST_TIMESTAMP, + TEST_TIMESTAMP + 1, + TEST_TIMESTAMP + 2, + ]: + cell = self._make_one( + TEST_VALUE, + TEST_ROW_KEY, + family, + qualifier, + timestamp, + TEST_LABELS, + ) + # cell should be the highest priority encountered so far + self.assertEqual(i, len(higher_cells)) + i += 1 + for other in higher_cells: + self.assertLess(cell, other) + higher_cells.append(cell) + # final order should be reverse of sorted order + expected_order = higher_cells + expected_order.reverse() + self.assertEqual(expected_order, sorted(higher_cells)) From 2dbd4ad0539fd2960e976ed3d40364acb7dac467 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 16 Mar 2023 10:59:08 -0700 Subject: [PATCH 052/349] removed explicit Mapping inheritance relationship --- google/cloud/bigtable/row_response.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index af714e147..cc55dd870 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -15,7 +15,7 @@ from __future__ import annotations from collections import OrderedDict -from typing import Sequence, Generator, Mapping, overload, Union, List, Tuple, Any +from typing import Sequence, Generator, overload, Any from functools import total_ordering # Type aliases used internally for readability. @@ -25,12 +25,7 @@ row_value = bytes -class RowResponse( - Sequence["CellResponse"], - Mapping[ - Union[family_id, Tuple[family_id, Union[qualifier, str]]], List["CellResponse"] - ], -): +class RowResponse(Sequence["CellResponse"]): """ Model class for row data returned from server @@ -203,7 +198,7 @@ def __contains__(self, item): def __getitem__( self, index: family_id | tuple[family_id, qualifier | str], - ) -> List[CellResponse]: + ) -> list[CellResponse]: # overload signature for type checking pass From 58024b0af9441e49decb79a1aaa63006990a7922 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 16 Mar 2023 11:12:29 -0700 Subject: [PATCH 053/349] improved comments --- google/cloud/bigtable/row_response.py | 60 ++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index cc55dd870..ded90f0d3 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -43,7 +43,12 @@ def __init__( cells: list[CellResponse] | dict[tuple[family_id, qualifier], list[dict[str, Any]]], ): - """Expected to be used internally only""" + """ + Initializes a RowResponse object + + RowResponse objects are not intended to be created by users. + They are returned by the Bigtable backend. + """ self.row_key = key self._cells_map: dict[ family_id, dict[qualifier, list[CellResponse]] @@ -175,11 +180,20 @@ def to_dict(self) -> dict[str, Any]: # Sequence and Mapping methods def __iter__(self): + """ + Allow iterating over all cells in the row + """ # iterate as a sequence; yield all cells for cell in self._cells_list: yield cell def __contains__(self, item): + """ + Implements `in` operator + + Works for both cells in the internal list, and `family` or + `(family, qualifier)` pairs associated with the cells + """ if isinstance(item, family_id): # check if family key is in RowResponse return item in self._cells_map @@ -213,6 +227,12 @@ def __getitem__(self, index: slice) -> list[CellResponse]: pass def __getitem__(self, index): + """ + Implements [] indexing + + Supports indexing by family, (family, qualifier) pair, + numerical index, and index slicing + """ if isinstance(index, family_id): return self.get_cells(family=index) elif ( @@ -230,9 +250,17 @@ def __getitem__(self, index): ) def __len__(self): + """ + Implements `len()` operator + """ return len(self._cells_list) def keys(self): + """ + Returns a list of (family, qualifier) pairs associated with the cells + + Pairs can be used for indexing + """ key_list = [] for family in self._cells_map: for qualifier in self._cells_map[family]: @@ -240,13 +268,22 @@ def keys(self): return key_list def values(self): + """ + Returns the list of cells in the row + """ return self._cells_list def items(self): + """ + Iterates over (family, qualifier) pairs and the list of associated cells + """ for key in self.keys(): yield key, self[key] def __eq__(self, other): + """ + Implements `==` operator + """ # for performance reasons, check row metadata # before checking individual cells if not isinstance(other, RowResponse): @@ -266,6 +303,12 @@ def __eq__(self, other): return False return True + def __ne__(self, other) -> bool: + """ + Implements `!=` operator + """ + return not self == other + @total_ordering class CellResponse: @@ -362,11 +405,17 @@ def __str__(self) -> str: return str(self.value) def __repr__(self): + """ + Returns a string representation of the cell + """ return f"CellResponse(value={self.value!r}, row={self.row_key!r}, family='{self.family}', column_qualifier={self.column_qualifier!r}, timestamp_ns={self.timestamp_ns}, labels={self.labels})" """For Bigtable native ordering""" def __lt__(self, other) -> bool: + """ + Implements `<` operator + """ if not isinstance(other, CellResponse): return NotImplemented this_ordering = ( @@ -386,6 +435,9 @@ def __lt__(self, other) -> bool: return this_ordering < other_ordering def __eq__(self, other) -> bool: + """ + Implements `==` operator + """ if not isinstance(other, CellResponse): return NotImplemented return ( @@ -399,9 +451,15 @@ def __eq__(self, other) -> bool: ) def __ne__(self, other) -> bool: + """ + Implements `!=` operator + """ return not self == other def __hash__(self): + """ + Implements `hash()` function to fingerprint cell + """ return hash( ( self.row_key, From d4cfd28cd4719d1233db8e9c657afeb6e7ca4750 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 16 Mar 2023 11:28:20 -0700 Subject: [PATCH 054/349] fixed values implementation --- google/cloud/bigtable/row_response.py | 8 ++++++-- tests/unit/test_row_response.py | 15 +++++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index ded90f0d3..2262e5a5b 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -269,9 +269,13 @@ def keys(self): def values(self): """ - Returns the list of cells in the row + Returns the the cells in the row, broken into lists + corresponding to the family and qualifier """ - return self._cells_list + result = [] + for key in self.keys(): + result.append(self[key]) + return result def items(self): """ diff --git a/tests/unit/test_row_response.py b/tests/unit/test_row_response.py index de46902e5..638292d2d 100644 --- a/tests/unit/test_row_response.py +++ b/tests/unit/test_row_response.py @@ -490,13 +490,20 @@ def test_keys(self): self.assertEqual(row_response.keys(), [(TEST_FAMILY_ID, TEST_QUALIFIER)]) def test_values(self): - # values should return the list of all cells - cell_list = [self._make_cell(qualifier=str(i).encode()) for i in range(10)] + # values should return the all cells, divided into lists + # according to (family,qualifier) pairs + cell_list = [self._make_cell(qualifier=str(i % 5).encode()) for i in range(10)] row_response = self._make_one(TEST_ROW_KEY, cell_list) sorted(cell_list) - self.assertEqual(len(row_response.values()), 10) - self.assertEqual(row_response.values(), cell_list) + values = list(row_response.values()) + self.assertEqual(len(values), 5) + self.assertEqual(len(values[0]), 2) + + keys = list(row_response.keys()) + values = list(row_response.values()) + for i in range(len(keys)): + self.assertEqual(row_response[keys[i]], values[i]) def test_items(self): cell_list = [self._make_cell() for i in range(10)] From cbe706298ce61be86636b5d95f43490f95031c8d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 16 Mar 2023 11:40:28 -0700 Subject: [PATCH 055/349] moved instance tracking back into Table --- google/cloud/bigtable/client.py | 64 +++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 9faab8010..81dac8ad1 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -45,7 +45,6 @@ class BigtableDataClient(ClientWithProject): def __init__( self, - instance_id: str, *, project: str | None = None, pool_size: int = 3, @@ -59,8 +58,6 @@ def __init__( Create a client instance for the Bigtable Data API Args: - instance_id: The Bigram instance ID to associate with this client - instance_id is combined with project to fully specify the instance project: the project which the client acts on behalf of. If not passed, falls back to the default inferred from the environment. @@ -94,7 +91,8 @@ def __init__( self.transport: PooledBigtableGrpcAsyncIOTransport = cast( PooledBigtableGrpcAsyncIOTransport, self._gapic_client.transport ) - self.instance_id = instance_id + # keep track of active instances to for warmup on channel refresh + self._active_instances = set() # attempt to start background tasks self._channel_init_time = time.time() self._channel_refresh_tasks: list[asyncio.Task[None]] = [] @@ -120,9 +118,9 @@ def start_background_channel_refresh(self) -> None: refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) self._channel_refresh_tasks.append(refresh_task) - async def _ping_and_warm_instance( + async def _ping_and_warm_instances( self, channel: grpc.aio.Channel - ) -> Exception | None: + ) -> list[Exception | None]: """ Prepares the backend for requests on a channel @@ -136,12 +134,8 @@ async def _ping_and_warm_instance( ping_rpc = channel.unary_unary( "/google.bigtable.v2.Bigtable/PingAndWarmChannel" ) - try: - return await ping_rpc( - {"name": f"projects/{self.project}/instances/{self.instance_id}"} - ) - except GoogleAPICallError as e: - return e + tasks = [ping_rpc({"name": n}) for n in self._active_instances] + return await asyncio.gather(*tasks, return_exceptions=True) async def _manage_channel( self, @@ -168,7 +162,7 @@ async def _manage_channel( if next_sleep > 0: # warm the current channel immediately channel = self.transport.channel_pool[channel_idx] - await self._ping_and_warm_instance(channel) + await self._ping_and_warm_instances(channel) # continuously refresh the channel every `refresh_interval` seconds while True: await asyncio.sleep(next_sleep) @@ -192,8 +186,37 @@ async def _manage_channel( # subtract the time spent waiting for the channel to be replaced next_sleep = refresh_interval - (time.time() - start_timestamp) - def get_table( + async def register_instance(self, instance_id: str): + """ + Registers an instance with the client, and warms the channel pool + for the instance + The client will periodically refresh grpc channel pool used to make + requests, and new channels will be warmed for each registered instance + Channels will not be refreshed unless at least one instance is registered + """ + instance_name = self._gapic_client.instance_path(self.project, instance_id) + self._active_instances.add(instance_name) + if self._channel_refresh_tasks: + # refresh tasks already running + # call ping and warm on all existing channels + for channel in self.transport.channel_pool: + await self._ping_and_warm_instances(channel) + else: + # refresh tasks aren't active. start them as background tasks + self.start_background_channel_refresh() + + + async def remove_instance_registration(self, instance_id: str): + """ + Removes an instance from the client's registered instances, to prevent + warming new channels for the instance + """ + instance_name = self._gapic_client.instance_path(self.project, instance_id) + self._active_instances.remove(instance_name) + + async def get_table( self, + instance_id: str, table_id: str, app_profile_id: str | None = None, ) -> Table: @@ -201,11 +224,14 @@ def get_table( Returns a table instance for making data API requests Args: + instance_id: The Bigtable instance ID to associate with this client + instance_id is combined with the client's project to fully + specify the instance table_id: The ID of the table. app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles """ - # ensure channel refresh tasks have started + await self.register_instance(instance_id) return Table(self, table_id, app_profile_id) @@ -220,10 +246,18 @@ class Table: def __init__( self, client: BigtableDataClient, + instance_id: str, table_id: str, app_profile_id: str | None = None, ): + """ + Initialize a Table instance + + Tables are not meant to be instantiated directly, but are returned by + `BigtableDataClient.get_table` + """ self.client = client + self.instance = instance_id self.table_id = table_id self.app_profile_id = app_profile_id From 909f889443927936a69a2c1d3cd44940c45b933d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 16 Mar 2023 11:43:49 -0700 Subject: [PATCH 056/349] fixed lint issues --- google/cloud/bigtable/client.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 81dac8ad1..55a689b57 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -15,7 +15,7 @@ from __future__ import annotations -from typing import cast, Any, AsyncIterable, Optional, TYPE_CHECKING +from typing import cast, Any, AsyncIterable, Optional, Set, TYPE_CHECKING import asyncio import grpc @@ -92,7 +92,7 @@ def __init__( PooledBigtableGrpcAsyncIOTransport, self._gapic_client.transport ) # keep track of active instances to for warmup on channel refresh - self._active_instances = set() + self._active_instances: Set[str] = set() # attempt to start background tasks self._channel_init_time = time.time() self._channel_refresh_tasks: list[asyncio.Task[None]] = [] @@ -120,7 +120,7 @@ def start_background_channel_refresh(self) -> None: async def _ping_and_warm_instances( self, channel: grpc.aio.Channel - ) -> list[Exception | None]: + ) -> list[GoogleAPICallError | None]: """ Prepares the backend for requests on a channel @@ -179,7 +179,7 @@ async def _manage_channel( ("grpc.max_receive_message_length", -1), ], ) - await self._ping_and_warm_instance(channel) + await self._ping_and_warm_instances(channel) # cycle channel out of use, with long grace window before closure start_timestamp = time.time() await self.transport.replace_channel(channel_idx, grace_period, new_channel) @@ -205,7 +205,6 @@ async def register_instance(self, instance_id: str): # refresh tasks aren't active. start them as background tasks self.start_background_channel_refresh() - async def remove_instance_registration(self, instance_id: str): """ Removes an instance from the client's registered instances, to prevent @@ -232,7 +231,7 @@ async def get_table( https://cloud.google.com/bigtable/docs/app-profiles """ await self.register_instance(instance_id) - return Table(self, table_id, app_profile_id) + return Table(self, instance_id, table_id, app_profile_id) class Table: From 5efa0ac83c04635cd4a59f7ad6e6505e2f54a4e9 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 16 Mar 2023 11:46:14 -0700 Subject: [PATCH 057/349] fixed blacken --- tests/unit/gapic/bigtable_v2/test_bigtable.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index 3dffd6aec..143678ae5 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -2639,6 +2639,7 @@ def test_generate_initial_change_stream_partitions_field_headers(): with pytest.raises(NotImplementedError): getattr(transport, method)(request=object()) + @pytest.mark.asyncio async def test_generate_initial_change_stream_partitions_field_headers_async(): client = BigtableAsyncClient( From 2f5c73ab4400efe52133a927ca5482e68fc48b73 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 21 Mar 2023 13:05:29 -0700 Subject: [PATCH 058/349] pulled in row_merger file from prototypes --- google/cloud/bigtable/row_merger.py | 413 ++++++++++++++++++++++++++++ 1 file changed, 413 insertions(+) create mode 100644 google/cloud/bigtable/row_merger.py diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py new file mode 100644 index 000000000..ad0e58864 --- /dev/null +++ b/google/cloud/bigtable/row_merger.py @@ -0,0 +1,413 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse +from google.cloud.bigtable.row import Row, DirectRow, InvalidChunk, PartialRowData, Cell +from google.protobuf.wrappers_pb2 import StringValue, BytesValue +from collections import deque, namedtuple +from datetime import datetime +import asyncio + +from typing import cast, Deque, Optional, List, Dict, Set, Any, AsyncIterable, AsyncGenerator, Awaitable + +class RowMerger: + def __init__(self): + self.state_machine: StateMachine = StateMachine() + self.cache: asyncio.Queue[PartialRowData] = asyncio.Queue() + + def push(self, new_data: ReadRowsResponse): + if not isinstance(new_data, ReadRowsResponse): + new_data = ReadRowsResponse(new_data) #type: ignore + last_scanned = new_data.last_scanned_row_key + # if the server sends a scan heartbeat, notify the state machine. + if last_scanned: + self.state_machine.handle_last_scanned_row(last_scanned) + if self.state_machine.has_complete_row(): + self.cache.put_nowait(self.state_machine.consume_row()) + # process new chunks through the state machine. + for chunk in new_data.chunks: + self.state_machine.handle_chunk(chunk) + if self.state_machine.has_complete_row(): + self.cache.put_nowait(self.state_machine.consume_row()) + + def has_full_frame(self) -> bool: + """ + Indicates whether there is a row ready to consume + """ + return not self.cache.empty() + + def has_partial_frame(self) -> bool: + """ + Returns true if the merger still has ongoing state + By the end of the process, there should be no partial state + """ + return self.state_machine.is_row_in_progress() + + def pop(self) -> PartialRowData: + """ + Return a row out of the cache of waiting rows + """ + return self.cache.get_nowait() + +class RowMergerIterator(RowMerger): + def __init__(self, request_generator:Awaitable[AsyncIterable[ReadRowsResponse]]): + super().__init__() + self.task = asyncio.create_task(self._consume_stream(request_generator)) + + def __aiter__(self): + # mark self as async iterator + return self + + async def __anext__(self): + # if there are waiting items, return one + if not self.cache.empty(): + return self.cache.get_nowait() + # no waiting tasks + # wait for either the task to finish, or a new item to enter the cache + get_from_cache = asyncio.create_task(self.cache.get()) + await asyncio.wait( + [self.task, get_from_cache], + return_when=asyncio.FIRST_COMPLETED) + # if a new item was put in the cache, return that + if get_from_cache.done(): + return get_from_cache.result() + # if the task was complete with an exception, raise the exception + elif self.task.done(): + if self.task.exception(): + raise cast(Exception, self.task.exception()) + else: + # task completed successfully + raise StopAsyncIteration + else: + raise RuntimeError("expected either new item or stream completion") + + async def _consume_stream(self, request_gen:AsyncIterable[ReadRowsResponse]): + """ + Coroutine to consume ReadRowsResponses from the backend, + run them through the state machine, and push them into the queue for later + consumption + """ + async for request in request_gen: + self.push(request) + if self.has_partial_frame(): + # read rows is complete, but there's still data in the merger + # TODO: change type + raise RuntimeError("read_rows completed with partial state remaining") + +class StateMachine: + def __init__(self): + self.completed_row_keys: Set[bytes] = set({}) + self.adapter: "RowBuilder" = RowBuilder() + self.reset() + + def reset(self): + self.current_state: Optional[State] = AWAITING_NEW_ROW(self) + self.last_cell_data: Dict[str, Any] = {} + # represents either the last row emitted, or the last_scanned_key sent from backend + # all future rows should have keys > last_seen_row_key + self.last_seen_row_key: Optional[bytes] = None + # self.expected_cell_size:int = 0 + # self.remaining_cell_bytes:int = 0 + self.complete_row: Optional[PartialRowData] = None + # self.num_cells_in_row:int = 0 + self.adapter.reset() + + def handle_last_scanned_row(self, last_scanned_row_key: bytes): + if self.last_seen_row_key and self.last_seen_row_key >= last_scanned_row_key: + raise InvalidChunk("Last scanned row key is out of order") + self.last_scanned_row_key = last_scanned_row_key + assert isinstance(self.current_state, State) + self.current_state = self.current_state.handle_last_scanned_row( + last_scanned_row_key + ) + + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk): + assert isinstance(self.current_state, State) + if chunk.row_key in self.completed_row_keys: + raise InvalidChunk(f"duplicate row key: {chunk.row_key.decode()}") + self.current_state = self.current_state.handle_chunk(chunk) + + def has_complete_row(self) -> bool: + return ( + isinstance(self.current_state, AWAITING_ROW_CONSUME) + and self.complete_row is not None + ) + + def consume_row(self) -> PartialRowData: + """ + Returns the last completed row and transitions to a new row + """ + if not self.has_complete_row() or self.complete_row is None: + raise RuntimeError("No row to consume") + row = self.complete_row + self.reset() + self.completed_row_keys.add(row.row_key) + return row + + def is_row_in_progress(self) -> bool: + return not isinstance(self.current_state, AWAITING_NEW_ROW) + + def handle_commit_row(self) -> "State": + """ + Called when a row is complete. + Wait in AWAITING_ROW_CONSUME state for the RowMerger to consume it + """ + self.complete_row = self.adapter.finish_row() + self.last_seen_row_key = self.complete_row.row_key + return AWAITING_ROW_CONSUME(self) + + def handle_reset_chunk( + self, chunk: ReadRowsResponse.CellChunk + ) -> "AWAITING_NEW_ROW": + """ + When a reset chunk comes in, drop all buffers and reset to AWAITING_NEW_ROW state + """ + # ensure reset chunk matches expectations + if isinstance(self.current_state, AWAITING_NEW_ROW): + raise InvalidChunk("Bare reset") + if chunk.row_key: + raise InvalidChunk("Reset chunk has a row key") + if "family_name" in chunk: + raise InvalidChunk("Reset chunk has family_name") + if "qualifier" in chunk: + raise InvalidChunk("Reset chunk has qualifier") + if chunk.timestamp_micros: + raise InvalidChunk("Reset chunk has a timestamp") + if chunk.labels: + raise InvalidChunk("Reset chunk has labels") + if chunk.value: + raise InvalidChunk("Reset chunk has a value") + self.reset() + assert isinstance(self.current_state, AWAITING_NEW_ROW) + return self.current_state + + +class State: + def __init__(self, owner: "StateMachine"): + self._owner = owner + + def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": + raise NotImplementedError + + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": + raise NotImplementedError + + +class AWAITING_NEW_ROW(State): + """ + Default state + Awaiting a chunk to start a new row + Exit states: any (depending on chunk) + """ + + def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": + self._owner.complete_row = self._owner.adapter.create_scan_marker_row( + last_scanned_row_key + ) + return AWAITING_ROW_CONSUME(self._owner) + + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": + if not chunk.row_key: + raise InvalidChunk("New row is missing a row key") + if ( + self._owner.last_seen_row_key + and self._owner.last_seen_row_key >= chunk.row_key + ): + raise InvalidChunk("Out of order row keys") + self._owner.adapter.start_row(chunk.row_key) + # the first chunk signals both the start of a new row and the start of a new cell, so + # force the chunk processing in the AWAITING_CELL_VALUE. + return AWAITING_NEW_CELL(self._owner).handle_chunk(chunk) + + +class AWAITING_NEW_CELL(State): + """ + Represents a cell boundary witin a row + Exit states: any (depending on chunk) + """ + + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": + if chunk.reset_row: + return self._owner.handle_reset_chunk(chunk) + chunk_size = len(chunk.value) + is_split = chunk.value_size > 0 + expected_cell_size = chunk.value_size if is_split else chunk_size + # track latest cell data. New chunks won't send repeated data + if chunk.family_name: + self._owner.last_cell_data["family"] = chunk.family_name + if not chunk.qualifier: + raise InvalidChunk("new column family must specify qualifier") + if chunk.qualifier: + self._owner.last_cell_data["qualifier"] = chunk.qualifier + if not self._owner.last_cell_data.get("family", False): + raise InvalidChunk("family not found") + self._owner.last_cell_data["labels"] = chunk.labels + self._owner.last_cell_data["timestamp"] = chunk.timestamp_micros + + # ensure that all chunks after the first one either are missing a row + # key or the row is the same + if ( + self._owner.adapter.row_in_progress() + and chunk.row_key + and chunk.row_key != self._owner.adapter.current_key + ): + raise InvalidChunk("row key changed mid row") + + self._owner.adapter.start_cell( + **self._owner.last_cell_data, size=expected_cell_size + ) + self._owner.adapter.cell_value(chunk.value) + # transition to new state + if is_split: + return AWAITING_CELL_VALUE(self._owner) + else: + # cell is complete + self._owner.adapter.finish_cell() + if chunk.commit_row: + # row is also complete + return self._owner.handle_commit_row() + else: + # wait for more cells for this row + return AWAITING_NEW_CELL(self._owner) + + +class AWAITING_CELL_VALUE(State): + """ + State that represents a split cell's continuation + Exit states: any (depending on chunk) + """ + + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": + # ensure reset chunk matches expectations + if chunk.row_key: + raise InvalidChunk("found row key mid cell") + if "family_name" in chunk: + raise InvalidChunk("In progress cell had a family name") + if "qualifier" in chunk: + raise InvalidChunk("In progress cell had a qualifier") + if chunk.timestamp_micros: + raise InvalidChunk("In progress cell had a timestamp") + if chunk.labels: + raise InvalidChunk("In progress cell had labels") + # check for reset row + if chunk.reset_row: + return self._owner.handle_reset_chunk(chunk) + is_last = chunk.value_size == 0 + self._owner.adapter.cell_value(chunk.value) + # transition to new state + if not is_last: + return AWAITING_CELL_VALUE(self._owner) + else: + # cell is complete + self._owner.adapter.finish_cell() + if chunk.commit_row: + # row is also complete + return self._owner.handle_commit_row() + else: + # wait for more cells for this row + return AWAITING_NEW_CELL(self._owner) + + +class AWAITING_ROW_CONSUME(State): + """ + Represents a completed row. Prevents new rows being read until it is consumed + Exit states: AWAITING_NEW_ROW + """ + + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": + raise RuntimeError("Skipping completed row") + + +CellData = namedtuple( + "CellData", ["family", "qualifier", "timestamp", "labels", "value"] +) + + +class RowBuilder: + """ + called by state machine to build rows + State machine makes the following guarantees: + Exactly 1 `start_row` for each row. + Exactly 1 `start_cell` for each cell. + At least 1 `cell_value` for each cell. + Exactly 1 `finish_cell` for each cell. + Exactly 1 `finish_row` for each row. + `create_scan_marker_row` can be called one or more times between `finish_row` and + `start_row`. `reset` can be called at any point and can be invoked multiple times in + a row. + """ + + def __init__(self): + self.reset() + + def row_in_progress(self) -> bool: + return self.current_key is not None + + def reset(self) -> None: + """called when the current in progress row should be dropped""" + self.current_key: Optional[bytes] = None + self.working_cell: Optional[CellData] = None + self.previous_cells: List[CellData] = [] + + def create_scan_marker_row(self, key: bytes) -> PartialRowData: + """creates a special row to mark server progress before any data is received""" + return PartialRowData(key) + + def start_row(self, key: bytes) -> None: + """Called to start a new row. This will be called once per row""" + self.current_key = key + + def start_cell( + self, + family: str, + qualifier: bytes, + timestamp: int, + labels: List[str], + size: int, + ) -> None: + """called to start a new cell in a row.""" + if not family: + raise InvalidChunk("missing family for a new cell") + if qualifier is None: + raise InvalidChunk("missing qualifier for a new cell") + self.working_cell = CellData(family, qualifier, timestamp, labels, bytearray()) + + def cell_value(self, value: bytes) -> None: + """called multiple times per cell to concatenate the cell value""" + assert isinstance(self.working_cell, CellData) + self.working_cell.value.extend(value) + + def finish_cell(self) -> None: + """called once per cell to signal the end of the value (unless reset)""" + assert isinstance(self.working_cell, CellData) + self.previous_cells.append(self.working_cell) + self.working_cell = None + + def finish_row(self) -> PartialRowData: + """called once per row to signal that all cells have been processed (unless reset)""" + cell_data:Dict[Any,Any] = {} + for cell in self.previous_cells: + # TODO: handle timezones? + # should probably make a new row class + # timestamp = datetime.fromtimestamp(cell.timestamp / 1e6) + family_dict = cell_data.get(cell.family, {}) + qualifier_arr = family_dict.get(cell.qualifier, []) + qualifier_arr.append(Cell(bytes(cell.value), cell.timestamp, cell.labels)) + family_dict[cell.qualifier] = qualifier_arr + cell_data[cell.family] = family_dict + new_row = PartialRowData(self.current_key) + new_row._cells = cell_data + self.reset() + return new_row From 38ceb8a3ce77cb8649c93e1a0a3eb7ac3b0584b0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 21 Mar 2023 13:26:33 -0700 Subject: [PATCH 059/349] replaced deprecated classes with new ones --- google/cloud/bigtable/row_merger.py | 34 ++++++++++++----------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index ad0e58864..a128812fa 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -14,7 +14,7 @@ # from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse -from google.cloud.bigtable.row import Row, DirectRow, InvalidChunk, PartialRowData, Cell +from google.cloud.bigtable.row_response import RowResponse, CellResponse from google.protobuf.wrappers_pb2 import StringValue, BytesValue from collections import deque, namedtuple from datetime import datetime @@ -22,10 +22,13 @@ from typing import cast, Deque, Optional, List, Dict, Set, Any, AsyncIterable, AsyncGenerator, Awaitable +class InvalidChunk(RuntimeError): + """Exception raised to invalid chunk data from back-end.""" + class RowMerger: def __init__(self): self.state_machine: StateMachine = StateMachine() - self.cache: asyncio.Queue[PartialRowData] = asyncio.Queue() + self.cache: asyncio.Queue[RowResponse] = asyncio.Queue() def push(self, new_data: ReadRowsResponse): if not isinstance(new_data, ReadRowsResponse): @@ -55,7 +58,7 @@ def has_partial_frame(self) -> bool: """ return self.state_machine.is_row_in_progress() - def pop(self) -> PartialRowData: + def pop(self) -> RowResponse: """ Return a row out of the cache of waiting rows """ @@ -120,7 +123,7 @@ def reset(self): self.last_seen_row_key: Optional[bytes] = None # self.expected_cell_size:int = 0 # self.remaining_cell_bytes:int = 0 - self.complete_row: Optional[PartialRowData] = None + self.complete_row: Optional[RowResponse] = None # self.num_cells_in_row:int = 0 self.adapter.reset() @@ -145,7 +148,7 @@ def has_complete_row(self) -> bool: and self.complete_row is not None ) - def consume_row(self) -> PartialRowData: + def consume_row(self) -> RowResponse: """ Returns the last completed row and transitions to a new row """ @@ -359,11 +362,11 @@ def reset(self) -> None: """called when the current in progress row should be dropped""" self.current_key: Optional[bytes] = None self.working_cell: Optional[CellData] = None - self.previous_cells: List[CellData] = [] + self.completed_cells: List[CellResponse] = [] - def create_scan_marker_row(self, key: bytes) -> PartialRowData: + def create_scan_marker_row(self, key: bytes) -> RowResponse: """creates a special row to mark server progress before any data is received""" - return PartialRowData(key) + return RowResponse(key, []) def start_row(self, key: bytes) -> None: """Called to start a new row. This will be called once per row""" @@ -392,22 +395,13 @@ def cell_value(self, value: bytes) -> None: def finish_cell(self) -> None: """called once per cell to signal the end of the value (unless reset)""" assert isinstance(self.working_cell, CellData) - self.previous_cells.append(self.working_cell) + self.completed_cells.append(CellResponse(*self.working_cell)) self.working_cell = None - def finish_row(self) -> PartialRowData: + def finish_row(self) -> RowResponse: """called once per row to signal that all cells have been processed (unless reset)""" cell_data:Dict[Any,Any] = {} - for cell in self.previous_cells: - # TODO: handle timezones? - # should probably make a new row class - # timestamp = datetime.fromtimestamp(cell.timestamp / 1e6) - family_dict = cell_data.get(cell.family, {}) - qualifier_arr = family_dict.get(cell.qualifier, []) - qualifier_arr.append(Cell(bytes(cell.value), cell.timestamp, cell.labels)) - family_dict[cell.qualifier] = qualifier_arr - cell_data[cell.family] = family_dict - new_row = PartialRowData(self.current_key) + new_row = RowResponse(self.current_key, self.completed_cells) new_row._cells = cell_data self.reset() return new_row From 877ad074b27041c59b9221201dfc93b370b1d916 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 21 Mar 2023 13:39:42 -0700 Subject: [PATCH 060/349] removed cell data tuple --- google/cloud/bigtable/row_merger.py | 61 +++++++++++++++++++---------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index a128812fa..9e59c5c06 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -20,11 +20,24 @@ from datetime import datetime import asyncio -from typing import cast, Deque, Optional, List, Dict, Set, Any, AsyncIterable, AsyncGenerator, Awaitable +from typing import ( + cast, + Deque, + Optional, + List, + Dict, + Set, + Any, + AsyncIterable, + AsyncGenerator, + Awaitable, +) + class InvalidChunk(RuntimeError): """Exception raised to invalid chunk data from back-end.""" + class RowMerger: def __init__(self): self.state_machine: StateMachine = StateMachine() @@ -32,7 +45,7 @@ def __init__(self): def push(self, new_data: ReadRowsResponse): if not isinstance(new_data, ReadRowsResponse): - new_data = ReadRowsResponse(new_data) #type: ignore + new_data = ReadRowsResponse(new_data) # type: ignore last_scanned = new_data.last_scanned_row_key # if the server sends a scan heartbeat, notify the state machine. if last_scanned: @@ -64,8 +77,9 @@ def pop(self) -> RowResponse: """ return self.cache.get_nowait() + class RowMergerIterator(RowMerger): - def __init__(self, request_generator:Awaitable[AsyncIterable[ReadRowsResponse]]): + def __init__(self, request_generator: Awaitable[AsyncIterable[ReadRowsResponse]]): super().__init__() self.task = asyncio.create_task(self._consume_stream(request_generator)) @@ -81,8 +95,8 @@ async def __anext__(self): # wait for either the task to finish, or a new item to enter the cache get_from_cache = asyncio.create_task(self.cache.get()) await asyncio.wait( - [self.task, get_from_cache], - return_when=asyncio.FIRST_COMPLETED) + [self.task, get_from_cache], return_when=asyncio.FIRST_COMPLETED + ) # if a new item was put in the cache, return that if get_from_cache.done(): return get_from_cache.result() @@ -96,9 +110,9 @@ async def __anext__(self): else: raise RuntimeError("expected either new item or stream completion") - async def _consume_stream(self, request_gen:AsyncIterable[ReadRowsResponse]): + async def _consume_stream(self, request_gen: AsyncIterable[ReadRowsResponse]): """ - Coroutine to consume ReadRowsResponses from the backend, + Coroutine to consume ReadRowsResponses from the backend, run them through the state machine, and push them into the queue for later consumption """ @@ -109,6 +123,7 @@ async def _consume_stream(self, request_gen:AsyncIterable[ReadRowsResponse]): # TODO: change type raise RuntimeError("read_rows completed with partial state remaining") + class StateMachine: def __init__(self): self.completed_row_keys: Set[bytes] = set({}) @@ -269,7 +284,9 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": raise InvalidChunk("row key changed mid row") self._owner.adapter.start_cell( - **self._owner.last_cell_data, size=expected_cell_size + **self._owner.last_cell_data, + row_key=self._owner.adapter.current_key, + size=expected_cell_size, ) self._owner.adapter.cell_value(chunk.value) # transition to new state @@ -333,11 +350,6 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": raise RuntimeError("Skipping completed row") -CellData = namedtuple( - "CellData", ["family", "qualifier", "timestamp", "labels", "value"] -) - - class RowBuilder: """ called by state machine to build rows @@ -361,7 +373,7 @@ def row_in_progress(self) -> bool: def reset(self) -> None: """called when the current in progress row should be dropped""" self.current_key: Optional[bytes] = None - self.working_cell: Optional[CellData] = None + self.working_cell: Optional[Tuple(CellResponse, bytearray)] = None self.completed_cells: List[CellResponse] = [] def create_scan_marker_row(self, key: bytes) -> RowResponse: @@ -374,6 +386,7 @@ def start_row(self, key: bytes) -> None: def start_cell( self, + row_key: bytes, family: str, qualifier: bytes, timestamp: int, @@ -385,23 +398,29 @@ def start_cell( raise InvalidChunk("missing family for a new cell") if qualifier is None: raise InvalidChunk("missing qualifier for a new cell") - self.working_cell = CellData(family, qualifier, timestamp, labels, bytearray()) + working_value = bytearray(size) + self.working_cell = ( + CellResponse(b"", row_key, family, qualifier, labels, timestamp), + working_value, + ) def cell_value(self, value: bytes) -> None: """called multiple times per cell to concatenate the cell value""" - assert isinstance(self.working_cell, CellData) - self.working_cell.value.extend(value) + if self.working_cell is None: + raise InvalidChunk("cell value received before start_cell") + self.working_cell[1].extend(value) def finish_cell(self) -> None: """called once per cell to signal the end of the value (unless reset)""" - assert isinstance(self.working_cell, CellData) - self.completed_cells.append(CellResponse(*self.working_cell)) + if self.working_cell.value is None: + raise InvalidChunk("cell value was never set") + complete_cell, complete_value = self.working_cell + complete_cell.value = bytes(complete_value) + self.completed_cells.append(complete_cell) self.working_cell = None def finish_row(self) -> RowResponse: """called once per row to signal that all cells have been processed (unless reset)""" - cell_data:Dict[Any,Any] = {} new_row = RowResponse(self.current_key, self.completed_cells) - new_row._cells = cell_data self.reset() return new_row From 37bba24a5f18a905c9ada5476732e415c03e8e7d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 21 Mar 2023 13:45:14 -0700 Subject: [PATCH 061/349] removed asserts; improved annotations --- google/cloud/bigtable/row_merger.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 9e59c5c06..24b52e249 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -23,7 +23,6 @@ from typing import ( cast, Deque, - Optional, List, Dict, Set, @@ -31,6 +30,7 @@ AsyncIterable, AsyncGenerator, Awaitable, + Tuple, ) @@ -130,29 +130,27 @@ def __init__(self): self.adapter: "RowBuilder" = RowBuilder() self.reset() - def reset(self): - self.current_state: Optional[State] = AWAITING_NEW_ROW(self) + def reset(self) -> None: + self.current_state: State = AWAITING_NEW_ROW(self) self.last_cell_data: Dict[str, Any] = {} # represents either the last row emitted, or the last_scanned_key sent from backend # all future rows should have keys > last_seen_row_key - self.last_seen_row_key: Optional[bytes] = None + self.last_seen_row_key: bytes | None = None # self.expected_cell_size:int = 0 # self.remaining_cell_bytes:int = 0 - self.complete_row: Optional[RowResponse] = None + self.complete_row: RowResponse | None = None # self.num_cells_in_row:int = 0 self.adapter.reset() - def handle_last_scanned_row(self, last_scanned_row_key: bytes): + def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> None: if self.last_seen_row_key and self.last_seen_row_key >= last_scanned_row_key: raise InvalidChunk("Last scanned row key is out of order") self.last_scanned_row_key = last_scanned_row_key - assert isinstance(self.current_state, State) self.current_state = self.current_state.handle_last_scanned_row( last_scanned_row_key ) - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk): - assert isinstance(self.current_state, State) + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> None: if chunk.row_key in self.completed_row_keys: raise InvalidChunk(f"duplicate row key: {chunk.row_key.decode()}") self.current_state = self.current_state.handle_chunk(chunk) @@ -208,7 +206,8 @@ def handle_reset_chunk( if chunk.value: raise InvalidChunk("Reset chunk has a value") self.reset() - assert isinstance(self.current_state, AWAITING_NEW_ROW) + if not isinstance(self.current_state, AWAITING_NEW_ROW): + raise RuntimeError("Failed to reset state machine") return self.current_state @@ -372,8 +371,8 @@ def row_in_progress(self) -> bool: def reset(self) -> None: """called when the current in progress row should be dropped""" - self.current_key: Optional[bytes] = None - self.working_cell: Optional[Tuple(CellResponse, bytearray)] = None + self.current_key: bytes | None = None + self.working_cell: Tuple(CellResponse, bytearray) | None = None self.completed_cells: List[CellResponse] = [] def create_scan_marker_row(self, key: bytes) -> RowResponse: From f95cf96049bdc7e7e7b4e1585d22b32d7d8d0e7f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 21 Mar 2023 13:57:34 -0700 Subject: [PATCH 062/349] fixed some mypy issues --- google/cloud/bigtable/row_merger.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 24b52e249..4d94346f7 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -12,23 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable.row_response import RowResponse, CellResponse -from google.protobuf.wrappers_pb2 import StringValue, BytesValue -from collections import deque, namedtuple -from datetime import datetime import asyncio from typing import ( cast, - Deque, List, Dict, Set, Any, AsyncIterable, - AsyncGenerator, Awaitable, Tuple, ) @@ -284,7 +280,6 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": self._owner.adapter.start_cell( **self._owner.last_cell_data, - row_key=self._owner.adapter.current_key, size=expected_cell_size, ) self._owner.adapter.cell_value(chunk.value) @@ -372,7 +367,7 @@ def row_in_progress(self) -> bool: def reset(self) -> None: """called when the current in progress row should be dropped""" self.current_key: bytes | None = None - self.working_cell: Tuple(CellResponse, bytearray) | None = None + self.working_cell: Tuple[CellResponse, bytearray] | None = None self.completed_cells: List[CellResponse] = [] def create_scan_marker_row(self, key: bytes) -> RowResponse: @@ -385,7 +380,6 @@ def start_row(self, key: bytes) -> None: def start_cell( self, - row_key: bytes, family: str, qualifier: bytes, timestamp: int, @@ -397,9 +391,11 @@ def start_cell( raise InvalidChunk("missing family for a new cell") if qualifier is None: raise InvalidChunk("missing qualifier for a new cell") + if self.current_key is None: + raise InvalidChunk("no row in progress") working_value = bytearray(size) self.working_cell = ( - CellResponse(b"", row_key, family, qualifier, labels, timestamp), + CellResponse(b"", self.current_key, family, qualifier, labels, timestamp), working_value, ) @@ -411,15 +407,19 @@ def cell_value(self, value: bytes) -> None: def finish_cell(self) -> None: """called once per cell to signal the end of the value (unless reset)""" - if self.working_cell.value is None: - raise InvalidChunk("cell value was never set") + if self.working_cell is None: + raise InvalidChunk("cell value received before start_cell") complete_cell, complete_value = self.working_cell + if not complete_value: + raise InvalidChunk("cell value was never set") complete_cell.value = bytes(complete_value) self.completed_cells.append(complete_cell) self.working_cell = None def finish_row(self) -> RowResponse: """called once per row to signal that all cells have been processed (unless reset)""" + if self.current_key is None: + raise InvalidChunk("no row in progress") new_row = RowResponse(self.current_key, self.completed_cells) self.reset() return new_row From c11ccc3374746aa52e5c6dba668bfc23ed99d63b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 21 Mar 2023 14:17:30 -0700 Subject: [PATCH 063/349] simplified row merger async iterator --- google/cloud/bigtable/row_merger.py | 73 ++++++++++++----------------- 1 file changed, 29 insertions(+), 44 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 4d94346f7..a25acb08a 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -25,7 +25,7 @@ Set, Any, AsyncIterable, - Awaitable, + AsyncGenerator, Tuple, ) @@ -73,51 +73,36 @@ def pop(self) -> RowResponse: """ return self.cache.get_nowait() - -class RowMergerIterator(RowMerger): - def __init__(self, request_generator: Awaitable[AsyncIterable[ReadRowsResponse]]): - super().__init__() - self.task = asyncio.create_task(self._consume_stream(request_generator)) - - def __aiter__(self): - # mark self as async iterator - return self - - async def __anext__(self): - # if there are waiting items, return one - if not self.cache.empty(): - return self.cache.get_nowait() - # no waiting tasks - # wait for either the task to finish, or a new item to enter the cache - get_from_cache = asyncio.create_task(self.cache.get()) - await asyncio.wait( - [self.task, get_from_cache], return_when=asyncio.FIRST_COMPLETED - ) - # if a new item was put in the cache, return that - if get_from_cache.done(): - return get_from_cache.result() - # if the task was complete with an exception, raise the exception - elif self.task.done(): - if self.task.exception(): - raise cast(Exception, self.task.exception()) - else: - # task completed successfully - raise StopAsyncIteration - else: - raise RuntimeError("expected either new item or stream completion") - - async def _consume_stream(self, request_gen: AsyncIterable[ReadRowsResponse]): + async def merge_row_stream( + self, request_generator: AsyncIterable[ReadRowsResponse] + ) -> AsyncGenerator[RowResponse, None]: """ - Coroutine to consume ReadRowsResponses from the backend, - run them through the state machine, and push them into the queue for later - consumption + Consume chunks from a ReadRowsResponse stream into a set of Rows """ - async for request in request_gen: - self.push(request) - if self.has_partial_frame(): - # read rows is complete, but there's still data in the merger - # TODO: change type - raise RuntimeError("read_rows completed with partial state remaining") + # read from stream and push into state machine + async def _consume_stream(self, request_gen: AsyncIterable[ReadRowsResponse]): + async for request in request_gen: + self.push(request) + if self.has_partial_frame(): + # read rows is complete, but there's still data in the merger + raise RuntimeError("read_rows completed with partial state remaining") + + stream_task = asyncio.create_task(_consume_stream(self, request_generator)) + # read from state machine and push into cache + while not stream_task.done() or not self.cache.empty(): + if not self.cache.empty(): + yield self.cache.get_nowait() + else: + # wait for either the stream to finish, or a new item to enter the cache + get_from_cache = asyncio.create_task(self.cache.get()) + await asyncio.wait( + [stream_task, get_from_cache], return_when=asyncio.FIRST_COMPLETED + ) + if get_from_cache.done(): + yield get_from_cache.result() + # stream and cache are complete. if there's an exception, raise it + if stream_task.exception(): + raise cast(Exception, stream_task.exception()) class StateMachine: From 187207612a90160980fdc497546c2b9aa7f9cd9c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 21 Mar 2023 14:26:22 -0700 Subject: [PATCH 064/349] pulled in read rows acceptance tests --- tests/unit/read-rows-acceptance-test.json | 1665 +++++++++++++++++++++ tests/unit/test_read_rows.py | 244 +++ 2 files changed, 1909 insertions(+) create mode 100644 tests/unit/read-rows-acceptance-test.json create mode 100644 tests/unit/test_read_rows.py diff --git a/tests/unit/read-rows-acceptance-test.json b/tests/unit/read-rows-acceptance-test.json new file mode 100644 index 000000000..011ace2b9 --- /dev/null +++ b/tests/unit/read-rows-acceptance-test.json @@ -0,0 +1,1665 @@ +{ + "readRowsTests": [ + { + "description": "invalid - no commit", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": false + } + ], + "results": [ + { + "error": true + } + ] + }, + { + "description": "invalid - no cell key before commit", + "chunks": [ + { + "commitRow": true + } + ], + "results": [ + { + "error": true + } + ] + }, + { + "description": "invalid - no cell key before value", + "chunks": [ + { + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": false + } + ], + "results": [ + { + "error": true + } + ] + }, + { + "description": "invalid - new col family must specify qualifier", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "familyName": "B", + "timestampMicros": "98", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": true + } + ], + "results": [ + { + "error": true + } + ] + }, + { + "description": "bare commit implies ts=0", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": false + }, + { + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + }, + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C" + } + ] + }, + { + "description": "simple row with timestamp", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + } + ] + }, + { + "description": "missing timestamp, implied ts=0", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "value": "dmFsdWUtVkFM", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "value": "value-VAL" + } + ] + }, + { + "description": "empty cell value", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C" + } + ] + }, + { + "description": "two unsplit cells", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "timestampMicros": "98", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "99", + "value": "value-VAL_1" + }, + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "98", + "value": "value-VAL_2" + } + ] + }, + { + "description": "two qualifiers", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "qualifier": "RA==", + "timestampMicros": "98", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "99", + "value": "value-VAL_1" + }, + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "D", + "timestampMicros": "98", + "value": "value-VAL_2" + } + ] + }, + { + "description": "two families", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "familyName": "B", + "qualifier": "RQ==", + "timestampMicros": "98", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "99", + "value": "value-VAL_1" + }, + { + "rowKey": "RK", + "familyName": "B", + "qualifier": "E", + "timestampMicros": "98", + "value": "value-VAL_2" + } + ] + }, + { + "description": "with labels", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "labels": [ + "L_1" + ], + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "timestampMicros": "98", + "labels": [ + "L_2" + ], + "value": "dmFsdWUtVkFMXzI=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "99", + "value": "value-VAL_1", + "label": "L_1" + }, + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "98", + "value": "value-VAL_2", + "label": "L_2" + } + ] + }, + { + "description": "split cell, bare commit", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dg==", + "valueSize": 9, + "commitRow": false + }, + { + "value": "YWx1ZS1WQUw=", + "commitRow": false + }, + { + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + }, + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C" + } + ] + }, + { + "description": "split cell", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dg==", + "valueSize": 9, + "commitRow": false + }, + { + "value": "YWx1ZS1WQUw=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + } + ] + }, + { + "description": "split four ways", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "labels": [ + "L" + ], + "value": "dg==", + "valueSize": 9, + "commitRow": false + }, + { + "value": "YQ==", + "valueSize": 9, + "commitRow": false + }, + { + "value": "bA==", + "valueSize": 9, + "commitRow": false + }, + { + "value": "dWUtVkFM", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL", + "label": "L" + } + ] + }, + { + "description": "two split cells", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "value": "dg==", + "valueSize": 11, + "commitRow": false + }, + { + "value": "YWx1ZS1WQUxfMQ==", + "commitRow": false + }, + { + "timestampMicros": "98", + "value": "dg==", + "valueSize": 11, + "commitRow": false + }, + { + "value": "YWx1ZS1WQUxfMg==", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "99", + "value": "value-VAL_1" + }, + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "98", + "value": "value-VAL_2" + } + ] + }, + { + "description": "multi-qualifier splits", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "value": "dg==", + "valueSize": 11, + "commitRow": false + }, + { + "value": "YWx1ZS1WQUxfMQ==", + "commitRow": false + }, + { + "qualifier": "RA==", + "timestampMicros": "98", + "value": "dg==", + "valueSize": 11, + "commitRow": false + }, + { + "value": "YWx1ZS1WQUxfMg==", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "99", + "value": "value-VAL_1" + }, + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "D", + "timestampMicros": "98", + "value": "value-VAL_2" + } + ] + }, + { + "description": "multi-qualifier multi-split", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "value": "dg==", + "valueSize": 11, + "commitRow": false + }, + { + "value": "YQ==", + "valueSize": 11, + "commitRow": false + }, + { + "value": "bHVlLVZBTF8x", + "commitRow": false + }, + { + "qualifier": "RA==", + "timestampMicros": "98", + "value": "dg==", + "valueSize": 11, + "commitRow": false + }, + { + "value": "YQ==", + "valueSize": 11, + "commitRow": false + }, + { + "value": "bHVlLVZBTF8y", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "99", + "value": "value-VAL_1" + }, + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "D", + "timestampMicros": "98", + "value": "value-VAL_2" + } + ] + }, + { + "description": "multi-family split", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "value": "dg==", + "valueSize": 11, + "commitRow": false + }, + { + "value": "YWx1ZS1WQUxfMQ==", + "commitRow": false + }, + { + "familyName": "B", + "qualifier": "RQ==", + "timestampMicros": "98", + "value": "dg==", + "valueSize": 11, + "commitRow": false + }, + { + "value": "YWx1ZS1WQUxfMg==", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "99", + "value": "value-VAL_1" + }, + { + "rowKey": "RK", + "familyName": "B", + "qualifier": "E", + "timestampMicros": "98", + "value": "value-VAL_2" + } + ] + }, + { + "description": "invalid - no commit between rows", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": false + }, + { + "rowKey": "UktfMg==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": false + } + ], + "results": [ + { + "error": true + } + ] + }, + { + "description": "invalid - no commit after first row", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": false + }, + { + "rowKey": "UktfMg==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + } + ], + "results": [ + { + "error": true + } + ] + }, + { + "description": "invalid - last row missing commit", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + }, + { + "rowKey": "UktfMg==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": false + } + ], + "results": [ + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + }, + { + "error": true + } + ] + }, + { + "description": "invalid - duplicate row key", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + }, + { + "rowKey": "UktfMQ==", + "familyName": "B", + "qualifier": "RA==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + }, + { + "error": true + } + ] + }, + { + "description": "invalid - new row missing row key", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + }, + { + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + }, + { + "error": true + } + ] + }, + { + "description": "two rows", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + }, + { + "rowKey": "UktfMg==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + }, + { + "rowKey": "RK_2", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + } + ] + }, + { + "description": "two rows implicit timestamp", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "value": "dmFsdWUtVkFM", + "commitRow": true + }, + { + "rowKey": "UktfMg==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "value": "value-VAL" + }, + { + "rowKey": "RK_2", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + } + ] + }, + { + "description": "two rows empty value", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "commitRow": true + }, + { + "rowKey": "UktfMg==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C" + }, + { + "rowKey": "RK_2", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + } + ] + }, + { + "description": "two rows, one with multiple cells", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "timestampMicros": "98", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": true + }, + { + "rowKey": "UktfMg==", + "familyName": "B", + "qualifier": "RA==", + "timestampMicros": "97", + "value": "dmFsdWUtVkFMXzM=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "99", + "value": "value-VAL_1" + }, + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "98", + "value": "value-VAL_2" + }, + { + "rowKey": "RK_2", + "familyName": "B", + "qualifier": "D", + "timestampMicros": "97", + "value": "value-VAL_3" + } + ] + }, + { + "description": "two rows, multiple cells", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "qualifier": "RA==", + "timestampMicros": "98", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": true + }, + { + "rowKey": "UktfMg==", + "familyName": "B", + "qualifier": "RQ==", + "timestampMicros": "97", + "value": "dmFsdWUtVkFMXzM=", + "commitRow": false + }, + { + "qualifier": "Rg==", + "timestampMicros": "96", + "value": "dmFsdWUtVkFMXzQ=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "99", + "value": "value-VAL_1" + }, + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "D", + "timestampMicros": "98", + "value": "value-VAL_2" + }, + { + "rowKey": "RK_2", + "familyName": "B", + "qualifier": "E", + "timestampMicros": "97", + "value": "value-VAL_3" + }, + { + "rowKey": "RK_2", + "familyName": "B", + "qualifier": "F", + "timestampMicros": "96", + "value": "value-VAL_4" + } + ] + }, + { + "description": "two rows, multiple cells, multiple families", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "familyName": "B", + "qualifier": "RQ==", + "timestampMicros": "98", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": true + }, + { + "rowKey": "UktfMg==", + "familyName": "M", + "qualifier": "Tw==", + "timestampMicros": "97", + "value": "dmFsdWUtVkFMXzM=", + "commitRow": false + }, + { + "familyName": "N", + "qualifier": "UA==", + "timestampMicros": "96", + "value": "dmFsdWUtVkFMXzQ=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "99", + "value": "value-VAL_1" + }, + { + "rowKey": "RK_1", + "familyName": "B", + "qualifier": "E", + "timestampMicros": "98", + "value": "value-VAL_2" + }, + { + "rowKey": "RK_2", + "familyName": "M", + "qualifier": "O", + "timestampMicros": "97", + "value": "value-VAL_3" + }, + { + "rowKey": "RK_2", + "familyName": "N", + "qualifier": "P", + "timestampMicros": "96", + "value": "value-VAL_4" + } + ] + }, + { + "description": "two rows, four cells, 2 labels", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "99", + "labels": [ + "L_1" + ], + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "timestampMicros": "98", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": true + }, + { + "rowKey": "UktfMg==", + "familyName": "B", + "qualifier": "RA==", + "timestampMicros": "97", + "labels": [ + "L_3" + ], + "value": "dmFsdWUtVkFMXzM=", + "commitRow": false + }, + { + "timestampMicros": "96", + "value": "dmFsdWUtVkFMXzQ=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "99", + "value": "value-VAL_1", + "label": "L_1" + }, + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "98", + "value": "value-VAL_2" + }, + { + "rowKey": "RK_2", + "familyName": "B", + "qualifier": "D", + "timestampMicros": "97", + "value": "value-VAL_3", + "label": "L_3" + }, + { + "rowKey": "RK_2", + "familyName": "B", + "qualifier": "D", + "timestampMicros": "96", + "value": "value-VAL_4" + } + ] + }, + { + "description": "two rows with splits, same timestamp", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dg==", + "valueSize": 11, + "commitRow": false + }, + { + "value": "YWx1ZS1WQUxfMQ==", + "commitRow": true + }, + { + "rowKey": "UktfMg==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dg==", + "valueSize": 11, + "commitRow": false + }, + { + "value": "YWx1ZS1WQUxfMg==", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL_1" + }, + { + "rowKey": "RK_2", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL_2" + } + ] + }, + { + "description": "invalid - bare reset", + "chunks": [ + { + "resetRow": true + } + ], + "results": [ + { + "error": true + } + ] + }, + { + "description": "invalid - bad reset, no commit", + "chunks": [ + { + "resetRow": true + }, + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": false + } + ], + "results": [ + { + "error": true + } + ] + }, + { + "description": "invalid - missing key after reset", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": false + }, + { + "resetRow": true + }, + { + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + } + ], + "results": [ + { + "error": true + } + ] + }, + { + "description": "no data after reset", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": false + }, + { + "resetRow": true + } + ] + }, + { + "description": "simple reset", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": false + }, + { + "resetRow": true + }, + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + } + ] + }, + { + "description": "reset to new val", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "resetRow": true + }, + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL_2" + } + ] + }, + { + "description": "reset to new qual", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "resetRow": true + }, + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "RA==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "D", + "timestampMicros": "100", + "value": "value-VAL_1" + } + ] + }, + { + "description": "reset with splits", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "timestampMicros": "98", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": false + }, + { + "resetRow": true + }, + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL_2" + } + ] + }, + { + "description": "reset two cells", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "resetRow": true + }, + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": false + }, + { + "timestampMicros": "97", + "value": "dmFsdWUtVkFMXzM=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL_2" + }, + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "97", + "value": "value-VAL_3" + } + ] + }, + { + "description": "two resets", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "resetRow": true + }, + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": false + }, + { + "resetRow": true + }, + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzM=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL_3" + } + ] + }, + { + "description": "reset then two cells", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "resetRow": true + }, + { + "rowKey": "Uks=", + "familyName": "B", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": false + }, + { + "qualifier": "RA==", + "timestampMicros": "97", + "value": "dmFsdWUtVkFMXzM=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "B", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL_2" + }, + { + "rowKey": "RK", + "familyName": "B", + "qualifier": "D", + "timestampMicros": "97", + "value": "value-VAL_3" + } + ] + }, + { + "description": "reset to new row", + "chunks": [ + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": false + }, + { + "resetRow": true + }, + { + "rowKey": "UktfMg==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzI=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK_2", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL_2" + } + ] + }, + { + "description": "reset in between chunks", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "labels": [ + "L" + ], + "value": "dg==", + "valueSize": 10, + "commitRow": false + }, + { + "value": "YQ==", + "valueSize": 10, + "commitRow": false + }, + { + "resetRow": true + }, + { + "rowKey": "UktfMQ==", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFMXzE=", + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK_1", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL_1" + } + ] + }, + { + "description": "invalid - reset with chunk", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "labels": [ + "L" + ], + "value": "dg==", + "valueSize": 10, + "commitRow": false + }, + { + "value": "YQ==", + "valueSize": 10, + "resetRow": true + } + ], + "results": [ + { + "error": true + } + ] + }, + { + "description": "invalid - commit with chunk", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "labels": [ + "L" + ], + "value": "dg==", + "valueSize": 10, + "commitRow": false + }, + { + "value": "YQ==", + "valueSize": 10, + "commitRow": true + } + ], + "results": [ + { + "error": true + } + ] + }, + { + "description": "empty cell chunk", + "chunks": [ + { + "rowKey": "Uks=", + "familyName": "A", + "qualifier": "Qw==", + "timestampMicros": "100", + "value": "dmFsdWUtVkFM", + "commitRow": false + }, + { + "commitRow": false + }, + { + "commitRow": true + } + ], + "results": [ + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C", + "timestampMicros": "100", + "value": "value-VAL" + }, + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C" + }, + { + "rowKey": "RK", + "familyName": "A", + "qualifier": "C" + } + ] + } + ] +} diff --git a/tests/unit/test_read_rows.py b/tests/unit/test_read_rows.py new file mode 100644 index 000000000..9ff83f7ed --- /dev/null +++ b/tests/unit/test_read_rows.py @@ -0,0 +1,244 @@ +import os +from itertools import zip_longest +from typing import List + +import proto +import pytest + +from google.cloud.bigtable_v2 import ReadRowsResponse + +from google.cloud.bigtable.row_merger import RowMerger, InvalidChunk +from google.cloud.bigtable.row_response import RowResponse + +# TODO: autogenerate protos from +# https://github.com/googleapis/conformance-tests/blob/main/bigtable/v2/proto/google/cloud/conformance/bigtable/v2/tests.proto +class ReadRowsTest(proto.Message): + class Result(proto.Message): + row_key = proto.Field(proto.STRING, number=1) + family_name = proto.Field(proto.STRING, number=2) + qualifier = proto.Field(proto.STRING, number=3) + timestamp_micros = proto.Field(proto.INT64, number=4) + value = proto.Field(proto.STRING, number=5) + label = proto.Field(proto.STRING, number=6) + error = proto.Field(proto.BOOL, number=7) + + description = proto.Field(proto.STRING, number=1) + chunks = proto.RepeatedField( + proto.MESSAGE, number=2, message=ReadRowsResponse.CellChunk + ) + results = proto.RepeatedField(proto.MESSAGE, number=3, message=Result) + + +class TestFile(proto.Message): + __test__ = False + read_rows_tests = proto.RepeatedField(proto.MESSAGE, number=1, message=ReadRowsTest) + + +def parse_readrows_acceptance_tests(): + dirname = os.path.dirname(__file__) + filename = os.path.join(dirname, "./read-rows-acceptance-test.json") + + with open(filename) as json_file: + test_json = TestFile.from_json(json_file.read()) + return test_json.read_rows_tests + + +def extract_results_from_row(row: RowResponse): + results = [] + for family, col, cells in row.items(): + for cell in cells: + results.append( + ReadRowsTest.Result( + row_key=row.row_key, + family_name=family, + qualifier=col, + timestamp_micros=cell.timestamp_ns // 1000, + value=cell.value, + label=(cell.labels[0] if cell.labels else ""), + ) + ) + return results + + +@pytest.mark.parametrize( + "test_case", parse_readrows_acceptance_tests(), ids=lambda t: t.description +) +def test_scenario(test_case: ReadRowsTest): + results = [] + try: + merger = RowMerger() + for chunk in test_case.chunks: + req = ReadRowsResponse.pb(ReadRowsResponse(chunks=[chunk])) + merger.push(req) + if merger.has_full_frame(): + row = merger.pop() + results.extend(extract_results_from_row(row)) + if merger.has_partial_frame(): + raise InvalidChunk("merger has partial frame after reading") + except InvalidChunk as e: + results.append(ReadRowsTest.Result(error=True)) + for expected, actual in zip_longest(test_case.results, results): + assert actual == expected + # def fake_read(*args, **kwargs): + # return iter([ReadRowsResponse(chunks=test_case.chunks)]) + # actual_results: List[ReadRowsTest.Result] = [] + # try: + # for row in PartialRowsData(fake_read, request=None): + # actual_results.extend(extract_results_from_row(row)) + # except (InvalidChunk, ValueError): + # actual_results.append(ReadRowsTest.Result(error=True)) + # breakpoint() + + +def test_out_of_order_rows(): + merger = RowMerger() + merger.state_machine.last_seen_row_key = b"a" + req = ReadRowsResponse(last_scanned_row_key=b"a") + with pytest.raises(InvalidChunk): + merger.push(req) + + +def test_bare_reset(): + first_chunk = ReadRowsResponse.CellChunk( + ReadRowsResponse.CellChunk( + row_key=b"a", family_name="f", qualifier=b"q", value=b"v" + ) + ) + with pytest.raises(InvalidChunk): + _process_chunks( + first_chunk, + ReadRowsResponse.CellChunk( + ReadRowsResponse.CellChunk(reset_row=True, row_key=b"a") + ), + ) + with pytest.raises(InvalidChunk): + _process_chunks( + first_chunk, + ReadRowsResponse.CellChunk( + ReadRowsResponse.CellChunk(reset_row=True, family_name="f") + ), + ) + with pytest.raises(InvalidChunk): + _process_chunks( + first_chunk, + ReadRowsResponse.CellChunk( + ReadRowsResponse.CellChunk(reset_row=True, qualifier=b"q") + ), + ) + with pytest.raises(InvalidChunk): + _process_chunks( + first_chunk, + ReadRowsResponse.CellChunk( + ReadRowsResponse.CellChunk(reset_row=True, timestamp_micros=1000) + ), + ) + with pytest.raises(InvalidChunk): + _process_chunks( + first_chunk, + ReadRowsResponse.CellChunk( + ReadRowsResponse.CellChunk(reset_row=True, labels=["a"]) + ), + ) + with pytest.raises(InvalidChunk): + _process_chunks( + first_chunk, + ReadRowsResponse.CellChunk( + ReadRowsResponse.CellChunk(reset_row=True, value=b"v") + ), + ) + + +def test_missing_family(): + with pytest.raises(InvalidChunk): + _process_chunks( + ReadRowsResponse.CellChunk( + row_key=b"a", + qualifier=b"q", + timestamp_micros=1000, + value=b"v", + commit_row=True, + ) + ) + + +def test_mid_cell_row_key_change(): + with pytest.raises(InvalidChunk): + _process_chunks( + ReadRowsResponse.CellChunk( + row_key=b"a", + family_name="f", + qualifier=b"q", + timestamp_micros=1000, + value_size=2, + value=b"v", + ), + ReadRowsResponse.CellChunk(row_key=b"b", value=b"v", commit_row=True), + ) + + +def test_mid_cell_family_change(): + with pytest.raises(InvalidChunk): + _process_chunks( + ReadRowsResponse.CellChunk( + row_key=b"a", + family_name="f", + qualifier=b"q", + timestamp_micros=1000, + value_size=2, + value=b"v", + ), + ReadRowsResponse.CellChunk(family_name="f2", value=b"v", commit_row=True), + ) + + +def test_mid_cell_qualifier_change(): + with pytest.raises(InvalidChunk): + _process_chunks( + ReadRowsResponse.CellChunk( + row_key=b"a", + family_name="f", + qualifier=b"q", + timestamp_micros=1000, + value_size=2, + value=b"v", + ), + ReadRowsResponse.CellChunk(qualifier=b"q2", value=b"v", commit_row=True), + ) + + +def test_mid_cell_timestamp_change(): + with pytest.raises(InvalidChunk): + _process_chunks( + ReadRowsResponse.CellChunk( + row_key=b"a", + family_name="f", + qualifier=b"q", + timestamp_micros=1000, + value_size=2, + value=b"v", + ), + ReadRowsResponse.CellChunk( + timestamp_micros=2000, value=b"v", commit_row=True + ), + ) + + +def test_mid_cell_labels_change(): + with pytest.raises(InvalidChunk): + _process_chunks( + ReadRowsResponse.CellChunk( + row_key=b"a", + family_name="f", + qualifier=b"q", + timestamp_micros=1000, + value_size=2, + value=b"v", + ), + ReadRowsResponse.CellChunk(labels=["b"], value=b"v", commit_row=True), + ) + + +def _process_chunks(*chunks): + req = ReadRowsResponse.pb(ReadRowsResponse(chunks=chunks)) + merger = RowMerger() + merger.push(req) From 755456f0042e07d11ef2165a78dc0973d6d36c07 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 21 Mar 2023 14:37:19 -0700 Subject: [PATCH 065/349] simple implementation of read_rows_stream --- google/cloud/bigtable/client.py | 12 +++++++++++- tests/unit/test_read_rows.py | 4 ++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index df4bf308f..d96ba6f7d 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -19,6 +19,7 @@ from google.cloud.client import ClientWithProject +from google.cloud.bigtable.row_merger import RowMerger import google.auth.credentials @@ -154,7 +155,16 @@ async def read_rows_stream( from any retries that failed - IdleTimeout: if generator was abandoned """ - raise NotImplementedError + request = query.to_dict() if isinstance(query, ReadRowsQuery) else query + request["table_name"] = self._gapic_client.table_name(self.table_id) + gapic_stream_handler = await self._gapic_client.read_rows( + request=request, + app_profile_id=self.app_profile_id, + timeout=operation_timeout, + ) + merger = RowMerger() + async for row in merger.merge_row_stream(gapic_stream_handler): + yield row async def read_rows( self, diff --git a/tests/unit/test_read_rows.py b/tests/unit/test_read_rows.py index 9ff83f7ed..c6d1566fe 100644 --- a/tests/unit/test_read_rows.py +++ b/tests/unit/test_read_rows.py @@ -1,6 +1,5 @@ import os from itertools import zip_longest -from typing import List import proto import pytest @@ -10,6 +9,7 @@ from google.cloud.bigtable.row_merger import RowMerger, InvalidChunk from google.cloud.bigtable.row_response import RowResponse + # TODO: autogenerate protos from # https://github.com/googleapis/conformance-tests/blob/main/bigtable/v2/proto/google/cloud/conformance/bigtable/v2/tests.proto class ReadRowsTest(proto.Message): @@ -75,7 +75,7 @@ def test_scenario(test_case: ReadRowsTest): results.extend(extract_results_from_row(row)) if merger.has_partial_frame(): raise InvalidChunk("merger has partial frame after reading") - except InvalidChunk as e: + except InvalidChunk: results.append(ReadRowsTest.Result(error=True)) for expected, actual in zip_longest(test_case.results, results): assert actual == expected From 08cab3877a8f4b7656e7960007f55ab15656b813 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 14:32:46 -0700 Subject: [PATCH 066/349] added assertions for exception messages --- google/cloud/bigtable/read_rows_query.py | 10 ++--- tests/unit/test_read_rows_query.py | 50 +++++++++++++++++++----- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 3f4ef1ebb..fb7a4174b 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -91,9 +91,7 @@ def set_filter( or isinstance(row_filter, RowFilter) or row_filter is None ): - raise ValueError( - "row_filter must be a RowFilter or corresponding dict representation" - ) + raise ValueError("row_filter must be a RowFilter or dict") self._filter = row_filter return self @@ -144,13 +142,11 @@ def add_range( if start_is_inclusive is None: start_is_inclusive = True elif start_key is None: - raise ValueError( - "start_is_inclusive must not be included if start_key is None" - ) + raise ValueError("start_is_inclusive must not be set without start_key") if end_is_inclusive is None: end_is_inclusive = False elif end_key is None: - raise ValueError("end_is_inclusive must not be included if end_key is None") + raise ValueError("end_is_inclusive must not be set without end_key") # ensure that start_key and end_key are bytes if isinstance(start_key, str): start_key = start_key.encode() diff --git a/tests/unit/test_read_rows_query.py b/tests/unit/test_read_rows_query.py index eb924edaa..569e97f17 100644 --- a/tests/unit/test_read_rows_query.py +++ b/tests/unit/test_read_rows_query.py @@ -50,8 +50,9 @@ def test_ctor_explicit(self): self.assertEqual(query.limit, 10) def test_ctor_invalid_limit(self): - with self.assertRaises(ValueError): + with self.assertRaises(ValueError) as exc: self._make_one(limit=-1) + self.assertEqual(exc.exception.args, ("limit must be >= 0",)) def test_set_filter(self): from google.cloud.bigtable.row_filters import RowFilterChain @@ -70,8 +71,11 @@ def test_set_filter(self): self.assertEqual(result, query) query.filter = RowFilterChain() self.assertEqual(query.filter, RowFilterChain()) - with self.assertRaises(ValueError): + with self.assertRaises(ValueError) as exc: query.filter = 1 + self.assertEqual( + exc.exception.args, ("row_filter must be a RowFilter or dict",) + ) def test_set_filter_dict(self): from google.cloud.bigtable.row_filters import RowSampleFilter @@ -103,10 +107,12 @@ def test_set_limit(self): result = query.set_limit(0) self.assertEqual(query.limit, 0) self.assertEqual(result, query) - with self.assertRaises(ValueError): + with self.assertRaises(ValueError) as exc: query.set_limit(-1) - with self.assertRaises(ValueError): + self.assertEqual(exc.exception.args, ("limit must be >= 0",)) + with self.assertRaises(ValueError) as exc: query.limit = -100 + self.assertEqual(exc.exception.args, ("limit must be >= 0",)) def test_add_rows_str(self): query = self._make_one() @@ -159,10 +165,12 @@ def test_add_rows_batch(self): def test_add_rows_invalid(self): query = self._make_one() - with self.assertRaises(ValueError): + with self.assertRaises(ValueError) as exc: query.add_rows(1) - with self.assertRaises(ValueError): + self.assertEqual(exc.exception.args, ("row_keys must be strings or bytes",)) + with self.assertRaises(ValueError) as exc: query.add_rows(["s", 0]) + self.assertEqual(exc.exception.args, ("row_keys must be strings or bytes",)) def test_duplicate_rows(self): # should only hold one of each input key @@ -212,14 +220,36 @@ def test_add_range(self): self.assertEqual(query.row_ranges[4][0], None) self.assertEqual(query.row_ranges[4][1], None) # test with inclusive flags only - with self.assertRaises(ValueError): + with self.assertRaises(ValueError) as exc: query.add_range(start_is_inclusive=True, end_is_inclusive=True) - with self.assertRaises(ValueError): + self.assertEqual( + exc.exception.args, + ("start_is_inclusive must not be set without start_key",), + ) + with self.assertRaises(ValueError) as exc: query.add_range(start_is_inclusive=False, end_is_inclusive=False) - with self.assertRaises(ValueError): + self.assertEqual( + exc.exception.args, + ("start_is_inclusive must not be set without start_key",), + ) + with self.assertRaises(ValueError) as exc: query.add_range(start_is_inclusive=False) - with self.assertRaises(ValueError): + self.assertEqual( + exc.exception.args, + ("start_is_inclusive must not be set without start_key",), + ) + with self.assertRaises(ValueError) as exc: query.add_range(end_is_inclusive=True) + self.assertEqual( + exc.exception.args, ("end_is_inclusive must not be set without end_key",) + ) + # test with invalid keys + with self.assertRaises(ValueError) as exc: + query.add_range(1, "2") + self.assertEqual(exc.exception.args, ("start_key must be a string or bytes",)) + with self.assertRaises(ValueError) as exc: + query.add_range("1", 2) + self.assertEqual(exc.exception.args, ("end_key must be a string or bytes",)) def test_to_dict_rows_default(self): # dictionary should be in rowset proto format From 7bf6c7be8ced1b0dad2617ba778a4be9312edcb3 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 14:32:58 -0700 Subject: [PATCH 067/349] added to_dict stub --- google/cloud/bigtable/row_filters.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/google/cloud/bigtable/row_filters.py b/google/cloud/bigtable/row_filters.py index 53192acc8..696e76c8c 100644 --- a/google/cloud/bigtable/row_filters.py +++ b/google/cloud/bigtable/row_filters.py @@ -35,6 +35,14 @@ class RowFilter(object): This class is a do-nothing base class for all row filters. """ + def to_dict(self): + """Convert the filter to a dictionary. + + :rtype: dict + :returns: The dictionary representation of this filter. + """ + raise NotImplementedError + class _BoolFilter(RowFilter): """Row filter that uses a boolean flag. From 587945c34c5b0551f4b9ce7a9c93569c856194a5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 14:39:34 -0700 Subject: [PATCH 068/349] removed python7 incompatible statement --- google/cloud/bigtable/row_response.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index 2262e5a5b..f8b30f833 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -217,7 +217,7 @@ def __getitem__( pass @overload - def __getitem__(self, index: int, /) -> CellResponse: + def __getitem__(self, index: int) -> CellResponse: # overload signature for type checking pass From 34dec95e02e7a5b6e92ecfd2463842ed4941ce96 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 14:53:29 -0700 Subject: [PATCH 069/349] removed unneeded functions --- google/cloud/bigtable/row_merger.py | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index a25acb08a..783cb2b6a 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -54,25 +54,6 @@ def push(self, new_data: ReadRowsResponse): if self.state_machine.has_complete_row(): self.cache.put_nowait(self.state_machine.consume_row()) - def has_full_frame(self) -> bool: - """ - Indicates whether there is a row ready to consume - """ - return not self.cache.empty() - - def has_partial_frame(self) -> bool: - """ - Returns true if the merger still has ongoing state - By the end of the process, there should be no partial state - """ - return self.state_machine.is_row_in_progress() - - def pop(self) -> RowResponse: - """ - Return a row out of the cache of waiting rows - """ - return self.cache.get_nowait() - async def merge_row_stream( self, request_generator: AsyncIterable[ReadRowsResponse] ) -> AsyncGenerator[RowResponse, None]: @@ -83,7 +64,7 @@ async def merge_row_stream( async def _consume_stream(self, request_gen: AsyncIterable[ReadRowsResponse]): async for request in request_gen: self.push(request) - if self.has_partial_frame(): + if self.state_machine.is_row_in_progress(): # read rows is complete, but there's still data in the merger raise RuntimeError("read_rows completed with partial state remaining") From 0e2b1e1602bcf8b41d739673f883e53802edd7e4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 14:58:22 -0700 Subject: [PATCH 070/349] added queue size --- google/cloud/bigtable/row_merger.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 783cb2b6a..979468b49 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -35,9 +35,11 @@ class InvalidChunk(RuntimeError): class RowMerger: - def __init__(self): + def __init__(self, max_queue_size: int|None = None): + if max_queue_size is None: + max_queue_size = -1 self.state_machine: StateMachine = StateMachine() - self.cache: asyncio.Queue[RowResponse] = asyncio.Queue() + self.cache: asyncio.Queue[RowResponse] = asyncio.Queue(max_queue_size) def push(self, new_data: ReadRowsResponse): if not isinstance(new_data, ReadRowsResponse): From 8ff5a19bfcc3c0f6b1129d7cf3167840279d5608 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 15:47:04 -0700 Subject: [PATCH 071/349] refactoring rowmerger --- google/cloud/bigtable/row_merger.py | 54 +++++++++++++++-------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 979468b49..14bac316b 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -41,40 +41,41 @@ def __init__(self, max_queue_size: int|None = None): self.state_machine: StateMachine = StateMachine() self.cache: asyncio.Queue[RowResponse] = asyncio.Queue(max_queue_size) - def push(self, new_data: ReadRowsResponse): - if not isinstance(new_data, ReadRowsResponse): - new_data = ReadRowsResponse(new_data) # type: ignore - last_scanned = new_data.last_scanned_row_key - # if the server sends a scan heartbeat, notify the state machine. - if last_scanned: - self.state_machine.handle_last_scanned_row(last_scanned) - if self.state_machine.has_complete_row(): - self.cache.put_nowait(self.state_machine.consume_row()) - # process new chunks through the state machine. - for chunk in new_data.chunks: - self.state_machine.handle_chunk(chunk) - if self.state_machine.has_complete_row(): - self.cache.put_nowait(self.state_machine.consume_row()) - async def merge_row_stream( self, request_generator: AsyncIterable[ReadRowsResponse] ) -> AsyncGenerator[RowResponse, None]: """ Consume chunks from a ReadRowsResponse stream into a set of Rows """ - # read from stream and push into state machine - async def _consume_stream(self, request_gen: AsyncIterable[ReadRowsResponse]): - async for request in request_gen: - self.push(request) - if self.state_machine.is_row_in_progress(): - # read rows is complete, but there's still data in the merger - raise RuntimeError("read_rows completed with partial state remaining") - - stream_task = asyncio.create_task(_consume_stream(self, request_generator)) - # read from state machine and push into cache + async for row_response in request_generator: + # ensure that the response is a ReadRowsResponse + if not isinstance(row_response, ReadRowsResponse): + row_response = ReadRowsResponse(row_response) + last_scanned = row_response.last_scanned_row_key + # if the server sends a scan heartbeat, notify the state machine. + if last_scanned: + self.state_machine.handle_last_scanned_row(last_scanned) + if self.state_machine.has_complete_row(): + yield self.state_machine.consume_row() + # process new chunks through the state machine. + for chunk in row_response.chunks: + self.state_machine.handle_chunk(chunk) + if self.state_machine.has_complete_row(): + yield self.state_machine.consume_row() + if self.state_machine.is_row_in_progress(): + # read rows is complete, but there's still data in the merger + raise RuntimeError("read_rows completed with partial state remaining") + + async def _generator_to_cache(self, input_generator: AsyncIterable[Any]) -> None: + async for item in input_generator: + await self.cache.put(item) + + async def merge_row_stream_with_cache(self, request_generator: AsyncIterable[ReadRowsResponse]) -> None: + stream_task = asyncio.create_task(self._generator_to_cache(self.merge_row_stream(request_generator))) + # read from state machine and push into cache while not stream_task.done() or not self.cache.empty(): if not self.cache.empty(): - yield self.cache.get_nowait() + yield await self.cache.get() else: # wait for either the stream to finish, or a new item to enter the cache get_from_cache = asyncio.create_task(self.cache.get()) @@ -88,6 +89,7 @@ async def _consume_stream(self, request_gen: AsyncIterable[ReadRowsResponse]): raise cast(Exception, stream_task.exception()) + class StateMachine: def __init__(self): self.completed_row_keys: Set[bytes] = set({}) From 35ba9802138b8d5954669923e9c6939505ef73b6 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 15:51:56 -0700 Subject: [PATCH 072/349] moved cache into function --- google/cloud/bigtable/row_merger.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 14bac316b..f2a626221 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -35,11 +35,8 @@ class InvalidChunk(RuntimeError): class RowMerger: - def __init__(self, max_queue_size: int|None = None): - if max_queue_size is None: - max_queue_size = -1 + def __init__(self): self.state_machine: StateMachine = StateMachine() - self.cache: asyncio.Queue[RowResponse] = asyncio.Queue(max_queue_size) async def merge_row_stream( self, request_generator: AsyncIterable[ReadRowsResponse] @@ -66,19 +63,23 @@ async def merge_row_stream( # read rows is complete, but there's still data in the merger raise RuntimeError("read_rows completed with partial state remaining") - async def _generator_to_cache(self, input_generator: AsyncIterable[Any]) -> None: + async def _generator_to_cache(self, cache:asyncio.Queue[Any], input_generator: AsyncIterable[Any]) -> None: async for item in input_generator: - await self.cache.put(item) + await cache.put(item) + + async def merge_row_stream_with_cache(self, request_generator: AsyncIterable[ReadRowsResponse], max_cache_size:int|None=None) -> None: + if max_cache_size is None: + max_cache_size = -1 + cache: asyncio.Queue[RowResponse] = asyncio.Queue(max_cache_size) - async def merge_row_stream_with_cache(self, request_generator: AsyncIterable[ReadRowsResponse]) -> None: - stream_task = asyncio.create_task(self._generator_to_cache(self.merge_row_stream(request_generator))) + stream_task = asyncio.create_task(self._generator_to_cache(cache, self.merge_row_stream(request_generator))) # read from state machine and push into cache - while not stream_task.done() or not self.cache.empty(): - if not self.cache.empty(): - yield await self.cache.get() + while not stream_task.done() or not cache.empty(): + if not cache.empty(): + yield await cache.get() else: # wait for either the stream to finish, or a new item to enter the cache - get_from_cache = asyncio.create_task(self.cache.get()) + get_from_cache = asyncio.create_task(cache.get()) await asyncio.wait( [stream_task, get_from_cache], return_when=asyncio.FIRST_COMPLETED ) From 238fc34f9c6cf46917a5d63c02cd641e638f54d3 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 16:14:29 -0700 Subject: [PATCH 073/349] made State an abc --- google/cloud/bigtable/row_merger.py | 34 ++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index f2a626221..aa380486b 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -18,6 +18,8 @@ from google.cloud.bigtable.row_response import RowResponse, CellResponse import asyncio +from abc import ABC, abstractmethod + from typing import ( cast, List, @@ -123,6 +125,9 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> None: self.current_state = self.current_state.handle_chunk(chunk) def has_complete_row(self) -> bool: + """ + Returns True if the state machine has a complete row ready to consume + """ return ( isinstance(self.current_state, AWAITING_ROW_CONSUME) and self.complete_row is not None @@ -140,6 +145,11 @@ def consume_row(self) -> RowResponse: return row def is_row_in_progress(self) -> bool: + """ + Returns true if the state machine is in the middle of processing a row + + At the end of the read_rows stream, is_row_in_progress() should return false + """ return not isinstance(self.current_state, AWAITING_NEW_ROW) def handle_commit_row(self) -> "State": @@ -178,16 +188,17 @@ def handle_reset_chunk( return self.current_state -class State: - def __init__(self, owner: "StateMachine"): - self._owner = owner - - def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": - raise NotImplementedError +class State(ABC): + def __init__(self, owner: StateMachine): + self.owner = owner + @abstractmethod def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": - raise NotImplementedError + pass + @abstractmethod + def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": + pass class AWAITING_NEW_ROW(State): """ @@ -267,6 +278,8 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": # wait for more cells for this row return AWAITING_NEW_CELL(self._owner) + def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": + raise InvalidChunk("Last scanned row key received in invalid state") class AWAITING_CELL_VALUE(State): """ @@ -304,6 +317,8 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": # wait for more cells for this row return AWAITING_NEW_CELL(self._owner) + def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": + raise InvalidChunk("Last scanned row key received in invalid state") class AWAITING_ROW_CONSUME(State): """ @@ -312,7 +327,10 @@ class AWAITING_ROW_CONSUME(State): """ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": - raise RuntimeError("Skipping completed row") + raise InvalidChunk("Row is complete. Must consume row before reading more") + + def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": + raise InvalidChunk("Row is complete. Must consume row before reading more") class RowBuilder: From 9ed4cc083f7b95bd49b1191b2b73ce829750a02b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 16:17:23 -0700 Subject: [PATCH 074/349] added comments --- google/cloud/bigtable/row_merger.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index aa380486b..ceed246a1 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -112,6 +112,9 @@ def reset(self) -> None: self.adapter.reset() def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> None: + """ + Called by RowMerger to notify the state machine of a scan heartbeat + """ if self.last_seen_row_key and self.last_seen_row_key >= last_scanned_row_key: raise InvalidChunk("Last scanned row key is out of order") self.last_scanned_row_key = last_scanned_row_key @@ -120,6 +123,9 @@ def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> None: ) def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> None: + """ + Called by RowMerger to process a new chunk + """ if chunk.row_key in self.completed_row_keys: raise InvalidChunk(f"duplicate row key: {chunk.row_key.decode()}") self.current_state = self.current_state.handle_chunk(chunk) From 9776889689ba5dede319af10cad8a0e7a997b608 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 16:30:27 -0700 Subject: [PATCH 075/349] moved reset and commit logic into state machine --- google/cloud/bigtable/row_merger.py | 49 +++++++++++++---------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index ceed246a1..4e166e51a 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -128,7 +128,12 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> None: """ if chunk.row_key in self.completed_row_keys: raise InvalidChunk(f"duplicate row key: {chunk.row_key.decode()}") - self.current_state = self.current_state.handle_chunk(chunk) + if chunk.reset_row: + self._handle_reset_row(chunk) + else: + self.current_state = self.current_state.handle_chunk(chunk) + if chunk.commit_row: + self._handle_commit_row(chunk) def has_complete_row(self) -> bool: """ @@ -158,23 +163,29 @@ def is_row_in_progress(self) -> bool: """ return not isinstance(self.current_state, AWAITING_NEW_ROW) - def handle_commit_row(self) -> "State": + def _handle_commit_row(self) -> "State": """ - Called when a row is complete. - Wait in AWAITING_ROW_CONSUME state for the RowMerger to consume it + Complete row and move into AWAITING_ROW_CONSUME state + + Called by StateMachine when a commit_row flag is set on a chunk """ + if not isinstance(self.current_state, AWAITING_NEW_CELL): + raise InvalidChunk("commit row attempted without finishing cell") self.complete_row = self.adapter.finish_row() self.last_seen_row_key = self.complete_row.row_key return AWAITING_ROW_CONSUME(self) - def handle_reset_chunk( - self, chunk: ReadRowsResponse.CellChunk - ) -> "AWAITING_NEW_ROW": + def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): """ - When a reset chunk comes in, drop all buffers and reset to AWAITING_NEW_ROW state + Drop all buffers and reset the row in progress + + Called by StateMachine when a reset_row flag is set on a chunk """ # ensure reset chunk matches expectations - if isinstance(self.current_state, AWAITING_NEW_ROW): + if isinstance(self.current_state, AWAITING_NEW_ROW) or \ + isinstance(self.current_state, AWAITING_ROW_CONSUME + ): + raise InvalidChunk("reset chunk received when not processing row") raise InvalidChunk("Bare reset") if chunk.row_key: raise InvalidChunk("Reset chunk has a row key") @@ -191,7 +202,6 @@ def handle_reset_chunk( self.reset() if not isinstance(self.current_state, AWAITING_NEW_ROW): raise RuntimeError("Failed to reset state machine") - return self.current_state class State(ABC): @@ -240,8 +250,6 @@ class AWAITING_NEW_CELL(State): """ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": - if chunk.reset_row: - return self._owner.handle_reset_chunk(chunk) chunk_size = len(chunk.value) is_split = chunk.value_size > 0 expected_cell_size = chunk.value_size if is_split else chunk_size @@ -277,12 +285,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": else: # cell is complete self._owner.adapter.finish_cell() - if chunk.commit_row: - # row is also complete - return self._owner.handle_commit_row() - else: - # wait for more cells for this row - return AWAITING_NEW_CELL(self._owner) + return AWAITING_NEW_CELL(self._owner) def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": raise InvalidChunk("Last scanned row key received in invalid state") @@ -305,9 +308,6 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": raise InvalidChunk("In progress cell had a timestamp") if chunk.labels: raise InvalidChunk("In progress cell had labels") - # check for reset row - if chunk.reset_row: - return self._owner.handle_reset_chunk(chunk) is_last = chunk.value_size == 0 self._owner.adapter.cell_value(chunk.value) # transition to new state @@ -316,12 +316,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": else: # cell is complete self._owner.adapter.finish_cell() - if chunk.commit_row: - # row is also complete - return self._owner.handle_commit_row() - else: - # wait for more cells for this row - return AWAITING_NEW_CELL(self._owner) + return AWAITING_NEW_CELL(self._owner) def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": raise InvalidChunk("Last scanned row key received in invalid state") From 1346b70d70425bdc575464b4dbb1b80abe3c6d42 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 16:40:13 -0700 Subject: [PATCH 076/349] renamed functions --- google/cloud/bigtable/row_merger.py | 67 +++++++++++++++-------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 4e166e51a..7098e0d5d 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -54,14 +54,14 @@ async def merge_row_stream( # if the server sends a scan heartbeat, notify the state machine. if last_scanned: self.state_machine.handle_last_scanned_row(last_scanned) - if self.state_machine.has_complete_row(): + if self.state_machine.row_ready(): yield self.state_machine.consume_row() # process new chunks through the state machine. for chunk in row_response.chunks: self.state_machine.handle_chunk(chunk) - if self.state_machine.has_complete_row(): + if self.state_machine.row_ready(): yield self.state_machine.consume_row() - if self.state_machine.is_row_in_progress(): + if not self.state_machine.is_terminal_state(): # read rows is complete, but there's still data in the merger raise RuntimeError("read_rows completed with partial state remaining") @@ -111,6 +111,36 @@ def reset(self) -> None: # self.num_cells_in_row:int = 0 self.adapter.reset() + def row_ready(self) -> bool: + """ + Returns True if the state machine has a complete row ready to consume + """ + return ( + isinstance(self.current_state, AWAITING_ROW_CONSUME) + and self.complete_row is not None + ) + + def consume_row(self) -> RowResponse: + """ + Returns the last completed row and transitions to a new row + """ + if not self.row_ready() or self.complete_row is None: + raise RuntimeError("No row to consume") + row = self.complete_row + self.reset() + self.completed_row_keys.add(row.row_key) + return row + + def is_terminal_state(self) -> bool: + """ + Returns true if the state machine is in a terminal state (AWAITING_NEW_ROW) + + At the end of the read_rows stream, if the state machine is not in a terminal + state, an exception should be raised + """ + return isinstance(self.current_state, AWAITING_NEW_ROW) + + def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> None: """ Called by RowMerger to notify the state machine of a scan heartbeat @@ -135,34 +165,6 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> None: if chunk.commit_row: self._handle_commit_row(chunk) - def has_complete_row(self) -> bool: - """ - Returns True if the state machine has a complete row ready to consume - """ - return ( - isinstance(self.current_state, AWAITING_ROW_CONSUME) - and self.complete_row is not None - ) - - def consume_row(self) -> RowResponse: - """ - Returns the last completed row and transitions to a new row - """ - if not self.has_complete_row() or self.complete_row is None: - raise RuntimeError("No row to consume") - row = self.complete_row - self.reset() - self.completed_row_keys.add(row.row_key) - return row - - def is_row_in_progress(self) -> bool: - """ - Returns true if the state machine is in the middle of processing a row - - At the end of the read_rows stream, is_row_in_progress() should return false - """ - return not isinstance(self.current_state, AWAITING_NEW_ROW) - def _handle_commit_row(self) -> "State": """ Complete row and move into AWAITING_ROW_CONSUME state @@ -216,6 +218,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": pass + class AWAITING_NEW_ROW(State): """ Default state @@ -308,7 +311,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": raise InvalidChunk("In progress cell had a timestamp") if chunk.labels: raise InvalidChunk("In progress cell had labels") - is_last = chunk.value_size == 0 + is_last = (chunk.value_size == 0) self._owner.adapter.cell_value(chunk.value) # transition to new state if not is_last: From 31ae397cb122518235c3f62abd2d02d3a940a1e6 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 16:49:10 -0700 Subject: [PATCH 077/349] better documented state machine --- google/cloud/bigtable/row_merger.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 7098e0d5d..4a35c7139 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -223,7 +223,9 @@ class AWAITING_NEW_ROW(State): """ Default state Awaiting a chunk to start a new row - Exit states: any (depending on chunk) + Exit states: + - AWAITING_ROW_CONSUME: when last_scanned_row_key heartbeat is received + - AWAITING_NEW_CELL: when a chunk with a row_key is received """ def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": @@ -249,7 +251,10 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": class AWAITING_NEW_CELL(State): """ Represents a cell boundary witin a row - Exit states: any (depending on chunk) + + Exit states: + - AWAITING_NEW_CELL: when the incoming cell is complete and ready for another + - AWAITING_CELL_VALUE: when the value is split across multiple chunks """ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": @@ -296,7 +301,10 @@ def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": class AWAITING_CELL_VALUE(State): """ State that represents a split cell's continuation - Exit states: any (depending on chunk) + + Exit states: + - AWAITING_NEW_CELL: when the cell is complete + - AWAITING_CELL_VALUE: when additional value chunks are required """ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": @@ -327,7 +335,8 @@ def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": class AWAITING_ROW_CONSUME(State): """ Represents a completed row. Prevents new rows being read until it is consumed - Exit states: AWAITING_NEW_ROW + Exit states: + - AWAITING_NEW_ROW: after the row is consumed """ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": From 27f7f399c3a1c0352cc92a529e9ddbf7475c3c4f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 17:02:39 -0700 Subject: [PATCH 078/349] removed unneeded functions --- google/cloud/bigtable/row_merger.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 4a35c7139..7417a4ffc 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -229,9 +229,8 @@ class AWAITING_NEW_ROW(State): """ def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": - self._owner.complete_row = self._owner.adapter.create_scan_marker_row( - last_scanned_row_key - ) + scan_marker = RowResponse(last_scanned_row_key, []) + self._owner.complete_row = scan_marker return AWAITING_ROW_CONSUME(self._owner) def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": @@ -273,11 +272,10 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": self._owner.last_cell_data["labels"] = chunk.labels self._owner.last_cell_data["timestamp"] = chunk.timestamp_micros - # ensure that all chunks after the first one either are missing a row + # ensure that all chunks after the first one are either missing a row # key or the row is the same if ( - self._owner.adapter.row_in_progress() - and chunk.row_key + chunk.row_key is not None and chunk.row_key != self._owner.adapter.current_key ): raise InvalidChunk("row key changed mid row") @@ -355,26 +353,20 @@ class RowBuilder: At least 1 `cell_value` for each cell. Exactly 1 `finish_cell` for each cell. Exactly 1 `finish_row` for each row. - `create_scan_marker_row` can be called one or more times between `finish_row` and - `start_row`. `reset` can be called at any point and can be invoked multiple times in + `reset` can be called at any point and can be invoked multiple times in a row. """ def __init__(self): + # initialize state self.reset() - def row_in_progress(self) -> bool: - return self.current_key is not None - def reset(self) -> None: """called when the current in progress row should be dropped""" self.current_key: bytes | None = None self.working_cell: Tuple[CellResponse, bytearray] | None = None self.completed_cells: List[CellResponse] = [] - def create_scan_marker_row(self, key: bytes) -> RowResponse: - """creates a special row to mark server progress before any data is received""" - return RowResponse(key, []) def start_row(self, key: bytes) -> None: """Called to start a new row. This will be called once per row""" From 86240212c019a321b764adc9210f014a19c27186 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 17:13:14 -0700 Subject: [PATCH 079/349] cleaning up RowBuilder --- google/cloud/bigtable/row_merger.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 7417a4ffc..7b672a516 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -364,12 +364,15 @@ def __init__(self): def reset(self) -> None: """called when the current in progress row should be dropped""" self.current_key: bytes | None = None - self.working_cell: Tuple[CellResponse, bytearray] | None = None + self.working_cell: CellResponse | None = None + self.working_value: bytearray | None = None self.completed_cells: List[CellResponse] = [] def start_row(self, key: bytes) -> None: """Called to start a new row. This will be called once per row""" + if self.current_key is not None or self.working_cell is not None or self.working_value is not None or self.completed_cells: + raise InvalidChunk("start_row called without finishing previous row") self.current_key = key def start_cell( @@ -387,28 +390,23 @@ def start_cell( raise InvalidChunk("missing qualifier for a new cell") if self.current_key is None: raise InvalidChunk("no row in progress") - working_value = bytearray(size) - self.working_cell = ( - CellResponse(b"", self.current_key, family, qualifier, labels, timestamp), - working_value, - ) + self.working_value = bytearray(size) + self.working_cell = CellResponse(b"", self.current_key, family, qualifier, labels, timestamp) def cell_value(self, value: bytes) -> None: """called multiple times per cell to concatenate the cell value""" - if self.working_cell is None: + if self.working_value is None: raise InvalidChunk("cell value received before start_cell") - self.working_cell[1].extend(value) + self.working_value.extend(value) def finish_cell(self) -> None: """called once per cell to signal the end of the value (unless reset)""" - if self.working_cell is None: + if self.working_cell is None or self.working_value is None: raise InvalidChunk("cell value received before start_cell") - complete_cell, complete_value = self.working_cell - if not complete_value: - raise InvalidChunk("cell value was never set") - complete_cell.value = bytes(complete_value) - self.completed_cells.append(complete_cell) + self.working_cell.value = bytes(self.working_value) + self.completed_cells.append(self.working_cell) self.working_cell = None + self.working_value = None def finish_row(self) -> RowResponse: """called once per row to signal that all cells have been processed (unless reset)""" From 322805e7c659df38cf4da295b51406fa75af43ee Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 17:14:22 -0700 Subject: [PATCH 080/349] ran blacken --- google/cloud/bigtable/row_merger.py | 38 ++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 7b672a516..b4b282428 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -65,17 +65,25 @@ async def merge_row_stream( # read rows is complete, but there's still data in the merger raise RuntimeError("read_rows completed with partial state remaining") - async def _generator_to_cache(self, cache:asyncio.Queue[Any], input_generator: AsyncIterable[Any]) -> None: + async def _generator_to_cache( + self, cache: asyncio.Queue[Any], input_generator: AsyncIterable[Any] + ) -> None: async for item in input_generator: await cache.put(item) - async def merge_row_stream_with_cache(self, request_generator: AsyncIterable[ReadRowsResponse], max_cache_size:int|None=None) -> None: + async def merge_row_stream_with_cache( + self, + request_generator: AsyncIterable[ReadRowsResponse], + max_cache_size: int | None = None, + ) -> None: if max_cache_size is None: max_cache_size = -1 cache: asyncio.Queue[RowResponse] = asyncio.Queue(max_cache_size) - stream_task = asyncio.create_task(self._generator_to_cache(cache, self.merge_row_stream(request_generator))) - # read from state machine and push into cache + stream_task = asyncio.create_task( + self._generator_to_cache(cache, self.merge_row_stream(request_generator)) + ) + # read from state machine and push into cache while not stream_task.done() or not cache.empty(): if not cache.empty(): yield await cache.get() @@ -92,7 +100,6 @@ async def merge_row_stream_with_cache(self, request_generator: AsyncIterable[Rea raise cast(Exception, stream_task.exception()) - class StateMachine: def __init__(self): self.completed_row_keys: Set[bytes] = set({}) @@ -140,7 +147,6 @@ def is_terminal_state(self) -> bool: """ return isinstance(self.current_state, AWAITING_NEW_ROW) - def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> None: """ Called by RowMerger to notify the state machine of a scan heartbeat @@ -184,8 +190,8 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): Called by StateMachine when a reset_row flag is set on a chunk """ # ensure reset chunk matches expectations - if isinstance(self.current_state, AWAITING_NEW_ROW) or \ - isinstance(self.current_state, AWAITING_ROW_CONSUME + if isinstance(self.current_state, AWAITING_NEW_ROW) or isinstance( + self.current_state, AWAITING_ROW_CONSUME ): raise InvalidChunk("reset chunk received when not processing row") raise InvalidChunk("Bare reset") @@ -296,6 +302,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": raise InvalidChunk("Last scanned row key received in invalid state") + class AWAITING_CELL_VALUE(State): """ State that represents a split cell's continuation @@ -317,7 +324,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": raise InvalidChunk("In progress cell had a timestamp") if chunk.labels: raise InvalidChunk("In progress cell had labels") - is_last = (chunk.value_size == 0) + is_last = chunk.value_size == 0 self._owner.adapter.cell_value(chunk.value) # transition to new state if not is_last: @@ -330,6 +337,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": raise InvalidChunk("Last scanned row key received in invalid state") + class AWAITING_ROW_CONSUME(State): """ Represents a completed row. Prevents new rows being read until it is consumed @@ -368,10 +376,14 @@ def reset(self) -> None: self.working_value: bytearray | None = None self.completed_cells: List[CellResponse] = [] - def start_row(self, key: bytes) -> None: """Called to start a new row. This will be called once per row""" - if self.current_key is not None or self.working_cell is not None or self.working_value is not None or self.completed_cells: + if ( + self.current_key is not None + or self.working_cell is not None + or self.working_value is not None + or self.completed_cells + ): raise InvalidChunk("start_row called without finishing previous row") self.current_key = key @@ -391,7 +403,9 @@ def start_cell( if self.current_key is None: raise InvalidChunk("no row in progress") self.working_value = bytearray(size) - self.working_cell = CellResponse(b"", self.current_key, family, qualifier, labels, timestamp) + self.working_cell = CellResponse( + b"", self.current_key, family, qualifier, labels, timestamp + ) def cell_value(self, value: bytes) -> None: """called multiple times per cell to concatenate the cell value""" From 98f6d89a11ffa26271c2fbdf1f15d2472a3d540d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 22 Mar 2023 17:15:48 -0700 Subject: [PATCH 081/349] made handle_last_scanned concrete --- google/cloud/bigtable/row_merger.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index b4b282428..77b0e7c97 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -220,10 +220,8 @@ def __init__(self, owner: StateMachine): def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": pass - @abstractmethod def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": - pass - + raise InvalidChunk("Last scanned row key received in invalid state") class AWAITING_NEW_ROW(State): """ @@ -299,9 +297,6 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": self._owner.adapter.finish_cell() return AWAITING_NEW_CELL(self._owner) - def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": - raise InvalidChunk("Last scanned row key received in invalid state") - class AWAITING_CELL_VALUE(State): """ @@ -334,9 +329,6 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": self._owner.adapter.finish_cell() return AWAITING_NEW_CELL(self._owner) - def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": - raise InvalidChunk("Last scanned row key received in invalid state") - class AWAITING_ROW_CONSUME(State): """ @@ -348,9 +340,6 @@ class AWAITING_ROW_CONSUME(State): def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": raise InvalidChunk("Row is complete. Must consume row before reading more") - def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": - raise InvalidChunk("Row is complete. Must consume row before reading more") - class RowBuilder: """ From 64162e3ec2febad2c9b27ea61ffdd7d5753987b0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 11:34:46 -0700 Subject: [PATCH 082/349] moved last scanned row out of states. Handled in StateMachine --- google/cloud/bigtable/row_merger.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 77b0e7c97..fa9f9972e 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -153,10 +153,12 @@ def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> None: """ if self.last_seen_row_key and self.last_seen_row_key >= last_scanned_row_key: raise InvalidChunk("Last scanned row key is out of order") + if not isinstance(self.current_state, AWAITING_NEW_ROW): + raise InvalidChunk("Last scanned row key received in invalid state") self.last_scanned_row_key = last_scanned_row_key - self.current_state = self.current_state.handle_last_scanned_row( - last_scanned_row_key - ) + scan_marker = RowResponse(last_scanned_row_key, []) + self.complete_row = scan_marker + self.current_state = AWAITING_ROW_CONSUME(self) def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> None: """ @@ -220,8 +222,6 @@ def __init__(self, owner: StateMachine): def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": pass - def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": - raise InvalidChunk("Last scanned row key received in invalid state") class AWAITING_NEW_ROW(State): """ @@ -232,11 +232,6 @@ class AWAITING_NEW_ROW(State): - AWAITING_NEW_CELL: when a chunk with a row_key is received """ - def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> "State": - scan_marker = RowResponse(last_scanned_row_key, []) - self._owner.complete_row = scan_marker - return AWAITING_ROW_CONSUME(self._owner) - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": if not chunk.row_key: raise InvalidChunk("New row is missing a row key") From 47fa07138829e46b58f218dca8cc72d320d39b08 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 11:48:22 -0700 Subject: [PATCH 083/349] renamed statemachine.reset to _reset_row --- google/cloud/bigtable/row_merger.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index fa9f9972e..b7ef404c8 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -104,9 +104,12 @@ class StateMachine: def __init__(self): self.completed_row_keys: Set[bytes] = set({}) self.adapter: "RowBuilder" = RowBuilder() - self.reset() + self._reset_row() - def reset(self) -> None: + def _reset_row(self) -> None: + """ + Drops the current row and transitions to AWAITING_NEW_ROW to start a fresh one + """ self.current_state: State = AWAITING_NEW_ROW(self) self.last_cell_data: Dict[str, Any] = {} # represents either the last row emitted, or the last_scanned_key sent from backend @@ -134,7 +137,7 @@ def consume_row(self) -> RowResponse: if not self.row_ready() or self.complete_row is None: raise RuntimeError("No row to consume") row = self.complete_row - self.reset() + self._reset_row() self.completed_row_keys.add(row.row_key) return row @@ -209,7 +212,7 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): raise InvalidChunk("Reset chunk has labels") if chunk.value: raise InvalidChunk("Reset chunk has a value") - self.reset() + self._reset_row() if not isinstance(self.current_state, AWAITING_NEW_ROW): raise RuntimeError("Failed to reset state machine") From 535f747104dcd42a642ff6015a748c3b95ce04c1 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 12:06:33 -0700 Subject: [PATCH 084/349] added doscstrings --- google/cloud/bigtable/row_merger.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index b7ef404c8..6b501f314 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -37,6 +37,16 @@ class InvalidChunk(RuntimeError): class RowMerger: + """ + RowMerger takes in a stream of ReadRows chunks + and processes them into a stream of RowResponses. + + RowMerger can wrap the stream directly, or use a cache to decouple + the producer from the consumer + + RowMerger uses a StateMachine instance to handle the chunk parsing + logic + """ def __init__(self): self.state_machine: StateMachine = StateMachine() @@ -101,6 +111,19 @@ async def merge_row_stream_with_cache( class StateMachine: + """ + State Machine converts chunks into RowResponses + + Chunks are added to the state machine via handle_chunk, which + transitions the state machine through the various states. + + When a row is complete, the state machine will transition to + AWAITING_ROW_CONSUME, and wait there until consume_row is called, + at which point it will transition back to AWAITING_NEW_ROW + + If an unexpected chunk is received for the current state, + the state machine will raise an InvalidChunk exception + """ def __init__(self): self.completed_row_keys: Set[bytes] = set({}) self.adapter: "RowBuilder" = RowBuilder() @@ -218,6 +241,12 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): class State(ABC): + """ + Represents a state the state machine can be in + + Each state is responsible for handling the next chunk, and then + transitioning to the next state + """ def __init__(self, owner: StateMachine): self.owner = owner From 009285b5e4e85ba3ced62aec2b7d366f4a643252 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 13:00:52 -0700 Subject: [PATCH 085/349] removed consume state in favor of returning rows from handle_chunk --- google/cloud/bigtable/row_merger.py | 89 ++++++++++------------------- 1 file changed, 29 insertions(+), 60 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 6b501f314..a22b5aac7 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -63,14 +63,12 @@ async def merge_row_stream( last_scanned = row_response.last_scanned_row_key # if the server sends a scan heartbeat, notify the state machine. if last_scanned: - self.state_machine.handle_last_scanned_row(last_scanned) - if self.state_machine.row_ready(): - yield self.state_machine.consume_row() + yield self.state_machine.handle_last_scanned_row(last_scanned) # process new chunks through the state machine. for chunk in row_response.chunks: - self.state_machine.handle_chunk(chunk) - if self.state_machine.row_ready(): - yield self.state_machine.consume_row() + complete_row = self.state_machine.handle_chunk(chunk) + if complete_row is not None: + yield complete_row if not self.state_machine.is_terminal_state(): # read rows is complete, but there's still data in the merger raise RuntimeError("read_rows completed with partial state remaining") @@ -117,9 +115,8 @@ class StateMachine: Chunks are added to the state machine via handle_chunk, which transitions the state machine through the various states. - When a row is complete, the state machine will transition to - AWAITING_ROW_CONSUME, and wait there until consume_row is called, - at which point it will transition back to AWAITING_NEW_ROW + When a row is complete, it will be returned from handle_chunk, + and the state machine will reset to AWAITING_NEW_ROW If an unexpected chunk is received for the current state, the state machine will raise an InvalidChunk exception @@ -140,30 +137,9 @@ def _reset_row(self) -> None: self.last_seen_row_key: bytes | None = None # self.expected_cell_size:int = 0 # self.remaining_cell_bytes:int = 0 - self.complete_row: RowResponse | None = None # self.num_cells_in_row:int = 0 self.adapter.reset() - def row_ready(self) -> bool: - """ - Returns True if the state machine has a complete row ready to consume - """ - return ( - isinstance(self.current_state, AWAITING_ROW_CONSUME) - and self.complete_row is not None - ) - - def consume_row(self) -> RowResponse: - """ - Returns the last completed row and transitions to a new row - """ - if not self.row_ready() or self.complete_row is None: - raise RuntimeError("No row to consume") - row = self.complete_row - self._reset_row() - self.completed_row_keys.add(row.row_key) - return row - def is_terminal_state(self) -> bool: """ Returns true if the state machine is in a terminal state (AWAITING_NEW_ROW) @@ -173,22 +149,25 @@ def is_terminal_state(self) -> bool: """ return isinstance(self.current_state, AWAITING_NEW_ROW) - def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> None: + def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> RowResponse: """ Called by RowMerger to notify the state machine of a scan heartbeat + + Returns an empty row with the last_scanned_row_key """ if self.last_seen_row_key and self.last_seen_row_key >= last_scanned_row_key: raise InvalidChunk("Last scanned row key is out of order") if not isinstance(self.current_state, AWAITING_NEW_ROW): raise InvalidChunk("Last scanned row key received in invalid state") - self.last_scanned_row_key = last_scanned_row_key scan_marker = RowResponse(last_scanned_row_key, []) - self.complete_row = scan_marker - self.current_state = AWAITING_ROW_CONSUME(self) + self._handle_complete_row(scan_marker) + return scan_marker - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> None: + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> RowResponse | None: """ Called by RowMerger to process a new chunk + + Returns a RowResponse if the chunk completes a row, otherwise returns None """ if chunk.row_key in self.completed_row_keys: raise InvalidChunk(f"duplicate row key: {chunk.row_key.decode()}") @@ -197,19 +176,24 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> None: else: self.current_state = self.current_state.handle_chunk(chunk) if chunk.commit_row: - self._handle_commit_row(chunk) + if not isinstance(self.current_state, AWAITING_NEW_CELL): + raise InvalidChunk("commit row attempted without finishing cell") + complete_row = self.adapter.finish_row() + self._handle_complete_row(complete_row) + return complete_row + else: + return None - def _handle_commit_row(self) -> "State": + def _handle_complete_row(self, complete_row:RowResponse) -> None: """ - Complete row and move into AWAITING_ROW_CONSUME state + Complete row, update seen keys, and move back to AWAITING_NEW_ROW - Called by StateMachine when a commit_row flag is set on a chunk + Called by StateMachine when a commit_row flag is set on a chunk, + or when a scan heartbeat is received """ - if not isinstance(self.current_state, AWAITING_NEW_CELL): - raise InvalidChunk("commit row attempted without finishing cell") - self.complete_row = self.adapter.finish_row() - self.last_seen_row_key = self.complete_row.row_key - return AWAITING_ROW_CONSUME(self) + self.last_seen_row_key = complete_row.row_key + self.completed_row_keys.add(complete_row.row_key) + self._reset_row() def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): """ @@ -218,11 +202,8 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): Called by StateMachine when a reset_row flag is set on a chunk """ # ensure reset chunk matches expectations - if isinstance(self.current_state, AWAITING_NEW_ROW) or isinstance( - self.current_state, AWAITING_ROW_CONSUME - ): + if isinstance(self.current_state, AWAITING_NEW_ROW): raise InvalidChunk("reset chunk received when not processing row") - raise InvalidChunk("Bare reset") if chunk.row_key: raise InvalidChunk("Reset chunk has a row key") if "family_name" in chunk: @@ -260,7 +241,6 @@ class AWAITING_NEW_ROW(State): Default state Awaiting a chunk to start a new row Exit states: - - AWAITING_ROW_CONSUME: when last_scanned_row_key heartbeat is received - AWAITING_NEW_CELL: when a chunk with a row_key is received """ @@ -357,17 +337,6 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": return AWAITING_NEW_CELL(self._owner) -class AWAITING_ROW_CONSUME(State): - """ - Represents a completed row. Prevents new rows being read until it is consumed - Exit states: - - AWAITING_NEW_ROW: after the row is consumed - """ - - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": - raise InvalidChunk("Row is complete. Must consume row before reading more") - - class RowBuilder: """ called by state machine to build rows From 08c8bb23ee8061c54898531eede4b4edfdf9e750 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 13:08:26 -0700 Subject: [PATCH 086/349] fixed bug in last scanned row key; moved out of reset_row --- google/cloud/bigtable/row_merger.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index a22b5aac7..3c067717f 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -123,6 +123,9 @@ class StateMachine: """ def __init__(self): self.completed_row_keys: Set[bytes] = set({}) + # represents either the last row emitted, or the last_scanned_key sent from backend + # all future rows should have keys > last_seen_row_key + self.last_seen_row_key: bytes | None = None self.adapter: "RowBuilder" = RowBuilder() self._reset_row() @@ -132,9 +135,6 @@ def _reset_row(self) -> None: """ self.current_state: State = AWAITING_NEW_ROW(self) self.last_cell_data: Dict[str, Any] = {} - # represents either the last row emitted, or the last_scanned_key sent from backend - # all future rows should have keys > last_seen_row_key - self.last_seen_row_key: bytes | None = None # self.expected_cell_size:int = 0 # self.remaining_cell_bytes:int = 0 # self.num_cells_in_row:int = 0 @@ -171,6 +171,8 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> RowResponse | None: """ if chunk.row_key in self.completed_row_keys: raise InvalidChunk(f"duplicate row key: {chunk.row_key.decode()}") + if self.last_seen_row_key and self.last_seen_row_key >= chunk.row_key: + raise InvalidChunk("Out of order row keys") if chunk.reset_row: self._handle_reset_row(chunk) else: @@ -247,11 +249,6 @@ class AWAITING_NEW_ROW(State): def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": if not chunk.row_key: raise InvalidChunk("New row is missing a row key") - if ( - self._owner.last_seen_row_key - and self._owner.last_seen_row_key >= chunk.row_key - ): - raise InvalidChunk("Out of order row keys") self._owner.adapter.start_row(chunk.row_key) # the first chunk signals both the start of a new row and the start of a new cell, so # force the chunk processing in the AWAITING_CELL_VALUE. From 7e85044301cfd7d645a03883dce801892acbaa48 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 13:08:44 -0700 Subject: [PATCH 087/349] added comments --- google/cloud/bigtable/row_merger.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 3c067717f..ba5197842 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -174,16 +174,20 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> RowResponse | None: if self.last_seen_row_key and self.last_seen_row_key >= chunk.row_key: raise InvalidChunk("Out of order row keys") if chunk.reset_row: + # reset row if requested self._handle_reset_row(chunk) else: + # otherwise, process the chunk and update the state self.current_state = self.current_state.handle_chunk(chunk) if chunk.commit_row: + # check if row is complete, and return it if so if not isinstance(self.current_state, AWAITING_NEW_CELL): raise InvalidChunk("commit row attempted without finishing cell") complete_row = self.adapter.finish_row() self._handle_complete_row(complete_row) return complete_row else: + # row is not complete, return None return None def _handle_complete_row(self, complete_row:RowResponse) -> None: From 81b4584572d8ba5b408265596286125a75cb0b3c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 13:19:57 -0700 Subject: [PATCH 088/349] simplified cell data tracking --- google/cloud/bigtable/row_merger.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index ba5197842..ec7cb1f91 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -134,7 +134,8 @@ def _reset_row(self) -> None: Drops the current row and transitions to AWAITING_NEW_ROW to start a fresh one """ self.current_state: State = AWAITING_NEW_ROW(self) - self.last_cell_data: Dict[str, Any] = {} + self.current_family : bytes | None = None + self.current_qualifier : bytes | None = None # self.expected_cell_size:int = 0 # self.remaining_cell_bytes:int = 0 # self.num_cells_in_row:int = 0 @@ -274,15 +275,13 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": expected_cell_size = chunk.value_size if is_split else chunk_size # track latest cell data. New chunks won't send repeated data if chunk.family_name: - self._owner.last_cell_data["family"] = chunk.family_name + self._owner.current_family = chunk.family_name if not chunk.qualifier: raise InvalidChunk("new column family must specify qualifier") if chunk.qualifier: - self._owner.last_cell_data["qualifier"] = chunk.qualifier - if not self._owner.last_cell_data.get("family", False): + self._owner.current_qualifier = chunk.qualifier + if self._owner.current_family is None: raise InvalidChunk("family not found") - self._owner.last_cell_data["labels"] = chunk.labels - self._owner.last_cell_data["timestamp"] = chunk.timestamp_micros # ensure that all chunks after the first one are either missing a row # key or the row is the same @@ -293,7 +292,10 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": raise InvalidChunk("row key changed mid row") self._owner.adapter.start_cell( - **self._owner.last_cell_data, + family=self._owner.current_family, + qualifier=self._owner.current_qualifier, + labels=chunk.labels, + timestamp=chunk.timestamp_micros, size=expected_cell_size, ) self._owner.adapter.cell_value(chunk.value) From 927f4013893fc3e7031c61e81a4e25f54dc48b7f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 13:23:46 -0700 Subject: [PATCH 089/349] fixed timestamp format error --- google/cloud/bigtable/row_merger.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index ec7cb1f91..6556165a0 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -379,7 +379,7 @@ def start_cell( self, family: str, qualifier: bytes, - timestamp: int, + timestamp_micros: int, labels: List[str], size: int, ) -> None: @@ -391,8 +391,9 @@ def start_cell( if self.current_key is None: raise InvalidChunk("no row in progress") self.working_value = bytearray(size) + timestamp_nanos = timestamp_micros * 1000 self.working_cell = CellResponse( - b"", self.current_key, family, qualifier, labels, timestamp + b"", self.current_key, family, qualifier, labels, timestamp_nanos ) def cell_value(self, value: bytes) -> None: From 8e343e6411de605702ee350c5357e1fc5af31f7d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 13:29:33 -0700 Subject: [PATCH 090/349] fixed bugs and ran blacken --- google/cloud/bigtable/row_merger.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 6556165a0..b23003061 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -47,6 +47,7 @@ class RowMerger: RowMerger uses a StateMachine instance to handle the chunk parsing logic """ + def __init__(self): self.state_machine: StateMachine = StateMachine() @@ -121,6 +122,7 @@ class StateMachine: If an unexpected chunk is received for the current state, the state machine will raise an InvalidChunk exception """ + def __init__(self): self.completed_row_keys: Set[bytes] = set({}) # represents either the last row emitted, or the last_scanned_key sent from backend @@ -134,8 +136,8 @@ def _reset_row(self) -> None: Drops the current row and transitions to AWAITING_NEW_ROW to start a fresh one """ self.current_state: State = AWAITING_NEW_ROW(self) - self.current_family : bytes | None = None - self.current_qualifier : bytes | None = None + self.current_family: str | None = None + self.current_qualifier: bytes | None = None # self.expected_cell_size:int = 0 # self.remaining_cell_bytes:int = 0 # self.num_cells_in_row:int = 0 @@ -191,7 +193,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> RowResponse | None: # row is not complete, return None return None - def _handle_complete_row(self, complete_row:RowResponse) -> None: + def _handle_complete_row(self, complete_row: RowResponse) -> None: """ Complete row, update seen keys, and move back to AWAITING_NEW_ROW @@ -235,6 +237,7 @@ class State(ABC): Each state is responsible for handling the next chunk, and then transitioning to the next state """ + def __init__(self, owner: StateMachine): self.owner = owner @@ -295,7 +298,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": family=self._owner.current_family, qualifier=self._owner.current_qualifier, labels=chunk.labels, - timestamp=chunk.timestamp_micros, + timestamp_micros=chunk.timestamp_micros, size=expected_cell_size, ) self._owner.adapter.cell_value(chunk.value) From f7fff852a62bfb583efea9e1e50163b779b0dff7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 13:36:12 -0700 Subject: [PATCH 091/349] fixed some mypy issues --- google/cloud/bigtable/row_merger.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index b23003061..44e258631 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -84,7 +84,7 @@ async def merge_row_stream_with_cache( self, request_generator: AsyncIterable[ReadRowsResponse], max_cache_size: int | None = None, - ) -> None: + ) -> AsyncGenerator[RowResponse, None]: if max_cache_size is None: max_cache_size = -1 cache: asyncio.Queue[RowResponse] = asyncio.Queue(max_cache_size) @@ -178,7 +178,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> RowResponse | None: raise InvalidChunk("Out of order row keys") if chunk.reset_row: # reset row if requested - self._handle_reset_row(chunk) + self._handle_reset_chunk(chunk) else: # otherwise, process the chunk and update the state self.current_state = self.current_state.handle_chunk(chunk) @@ -239,7 +239,7 @@ class State(ABC): """ def __init__(self, owner: StateMachine): - self.owner = owner + self._owner = owner @abstractmethod def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": @@ -297,7 +297,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": self._owner.adapter.start_cell( family=self._owner.current_family, qualifier=self._owner.current_qualifier, - labels=chunk.labels, + labels=list(chunk.labels), timestamp_micros=chunk.timestamp_micros, size=expected_cell_size, ) @@ -380,8 +380,8 @@ def start_row(self, key: bytes) -> None: def start_cell( self, - family: str, - qualifier: bytes, + family: str|None, + qualifier: bytes|None, timestamp_micros: int, labels: List[str], size: int, From d40128d2a61cb31570d999b3e3c9b78b54cb4052 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 13:55:58 -0700 Subject: [PATCH 092/349] fixed failing tests --- tests/unit/test_read_rows_query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_read_rows_query.py b/tests/unit/test_read_rows_query.py index 569e97f17..2f0261eba 100644 --- a/tests/unit/test_read_rows_query.py +++ b/tests/unit/test_read_rows_query.py @@ -91,7 +91,7 @@ def test_set_filter_dict(self): output = query.to_dict() self.assertEqual(output["filter"], filter1_dict) proto_output = ReadRowsRequest(**output) - self.assertEqual(proto_output.filter, filter1.to_pb()) + self.assertEqual(proto_output.filter, filter1._to_pb()) query.filter = None self.assertEqual(query.filter, None) @@ -308,7 +308,7 @@ def test_to_dict_rows_populated(self): self.assertEqual(request_proto.rows_limit, 100) # check filter filter_proto = request_proto.filter - self.assertEqual(filter_proto, row_filter.to_pb()) + self.assertEqual(filter_proto, row_filter._to_pb()) def test_shard(self): pass From 96d30b6137b4f329c3d4d64b37bddee752db706d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 14:43:40 -0700 Subject: [PATCH 093/349] changed tests for refactored read_rows --- google/cloud/bigtable/row_merger.py | 6 +- tests/unit/test_read_rows.py | 85 +++++++++++++++++------------ 2 files changed, 53 insertions(+), 38 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 44e258631..ac2e066ac 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -23,12 +23,10 @@ from typing import ( cast, List, - Dict, Set, Any, AsyncIterable, AsyncGenerator, - Tuple, ) @@ -380,8 +378,8 @@ def start_row(self, key: bytes) -> None: def start_cell( self, - family: str|None, - qualifier: bytes|None, + family: str | None, + qualifier: bytes | None, timestamp_micros: int, labels: List[str], size: int, diff --git a/tests/unit/test_read_rows.py b/tests/unit/test_read_rows.py index c6d1566fe..3276111df 100644 --- a/tests/unit/test_read_rows.py +++ b/tests/unit/test_read_rows.py @@ -63,17 +63,18 @@ def extract_results_from_row(row: RowResponse): @pytest.mark.parametrize( "test_case", parse_readrows_acceptance_tests(), ids=lambda t: t.description ) -def test_scenario(test_case: ReadRowsTest): - results = [] +@pytest.mark.asyncio +async def test_scenario(test_case: ReadRowsTest): + async def _scenerio_stream(): + for chunk in test_case.chunks: + yield ReadRowsResponse(chunks=[chunk]) + try: merger = RowMerger() - for chunk in test_case.chunks: - req = ReadRowsResponse.pb(ReadRowsResponse(chunks=[chunk])) - merger.push(req) - if merger.has_full_frame(): - row = merger.pop() - results.extend(extract_results_from_row(row)) - if merger.has_partial_frame(): + results = [] + async for row in merger.merge_row_stream(_scenerio_stream()): + results.append(row) + if not merger.state_machine.is_terminal_state(): raise InvalidChunk("merger has partial frame after reading") except InvalidChunk: results.append(ReadRowsTest.Result(error=True)) @@ -90,57 +91,62 @@ def test_scenario(test_case: ReadRowsTest): # breakpoint() -def test_out_of_order_rows(): +@pytest.mark.asyncio +async def test_out_of_order_rows(): + async def _row_stream(): + yield ReadRowsResponse(last_scanned_row_key=b"a") + merger = RowMerger() merger.state_machine.last_seen_row_key = b"a" - req = ReadRowsResponse(last_scanned_row_key=b"a") with pytest.raises(InvalidChunk): - merger.push(req) + async for _ in merger.merge_row_stream(_row_stream()): + pass -def test_bare_reset(): +@pytest.mark.asyncio +async def test_bare_reset(): first_chunk = ReadRowsResponse.CellChunk( ReadRowsResponse.CellChunk( row_key=b"a", family_name="f", qualifier=b"q", value=b"v" ) ) with pytest.raises(InvalidChunk): - _process_chunks( + await _process_chunks( first_chunk, ReadRowsResponse.CellChunk( ReadRowsResponse.CellChunk(reset_row=True, row_key=b"a") ), ) with pytest.raises(InvalidChunk): - _process_chunks( + await _process_chunks( first_chunk, ReadRowsResponse.CellChunk( ReadRowsResponse.CellChunk(reset_row=True, family_name="f") ), ) with pytest.raises(InvalidChunk): - _process_chunks( + await _process_chunks( first_chunk, ReadRowsResponse.CellChunk( ReadRowsResponse.CellChunk(reset_row=True, qualifier=b"q") ), ) with pytest.raises(InvalidChunk): - _process_chunks( + await _process_chunks( first_chunk, ReadRowsResponse.CellChunk( ReadRowsResponse.CellChunk(reset_row=True, timestamp_micros=1000) ), ) with pytest.raises(InvalidChunk): - _process_chunks( + await _process_chunks( first_chunk, ReadRowsResponse.CellChunk( ReadRowsResponse.CellChunk(reset_row=True, labels=["a"]) ), ) with pytest.raises(InvalidChunk): - _process_chunks( + await _process_chunks( first_chunk, ReadRowsResponse.CellChunk( ReadRowsResponse.CellChunk(reset_row=True, value=b"v") @@ -148,9 +154,10 @@ def test_bare_reset(): ) -def test_missing_family(): +@pytest.mark.asyncio +async def test_missing_family(): with pytest.raises(InvalidChunk): - _process_chunks( + await _process_chunks( ReadRowsResponse.CellChunk( row_key=b"a", qualifier=b"q", @@ -161,9 +168,10 @@ def test_missing_family(): ) -def test_mid_cell_row_key_change(): +@pytest.mark.asyncio +async def test_mid_cell_row_key_change(): with pytest.raises(InvalidChunk): - _process_chunks( + await _process_chunks( ReadRowsResponse.CellChunk( row_key=b"a", family_name="f", @@ -176,9 +184,10 @@ def test_mid_cell_row_key_change(): ) -def test_mid_cell_family_change(): +@pytest.mark.asyncio +async def test_mid_cell_family_change(): with pytest.raises(InvalidChunk): - _process_chunks( + await _process_chunks( ReadRowsResponse.CellChunk( row_key=b"a", family_name="f", @@ -191,9 +200,10 @@ def test_mid_cell_family_change(): ) -def test_mid_cell_qualifier_change(): +@pytest.mark.asyncio +async def test_mid_cell_qualifier_change(): with pytest.raises(InvalidChunk): - _process_chunks( + await _process_chunks( ReadRowsResponse.CellChunk( row_key=b"a", family_name="f", @@ -206,9 +216,10 @@ def test_mid_cell_qualifier_change(): ) -def test_mid_cell_timestamp_change(): +@pytest.mark.asyncio +async def test_mid_cell_timestamp_change(): with pytest.raises(InvalidChunk): - _process_chunks( + await _process_chunks( ReadRowsResponse.CellChunk( row_key=b"a", family_name="f", @@ -223,9 +234,10 @@ def test_mid_cell_timestamp_change(): ) -def test_mid_cell_labels_change(): +@pytest.mark.asyncio +async def test_mid_cell_labels_change(): with pytest.raises(InvalidChunk): - _process_chunks( + await _process_chunks( ReadRowsResponse.CellChunk( row_key=b"a", family_name="f", @@ -238,7 +250,12 @@ def test_mid_cell_labels_change(): ) -def _process_chunks(*chunks): - req = ReadRowsResponse.pb(ReadRowsResponse(chunks=chunks)) +async def _process_chunks(*chunks): + async def _row_stream(): + yield ReadRowsResponse(chunks=chunks) + merger = RowMerger() - merger.push(req) + results = [] + async for row in merger.merge_row_stream(_row_stream()): + results.append(row) + return results From d4904d75e45cf0bbb8b620832ae789a701556e2c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 16:29:20 -0700 Subject: [PATCH 094/349] changed generation arguments --- gapic-generator-fork | 2 +- .../services/bigtable/async_client.py | 196 +----------------- .../bigtable_v2/services/bigtable/client.py | 195 +---------------- .../services/bigtable/transports/base.py | 10 - .../services/bigtable/transports/rest.py | 117 +++++------ tests/unit/gapic/bigtable_v2/test_bigtable.py | 90 +++++--- 6 files changed, 123 insertions(+), 487 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index b64e7a335..78be45a70 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit b64e7a3351804613aaf56d7a699390d8dbc70712 +Subproject commit 78be45a70d87092acdef9d32f12bfc79d2e46941 diff --git a/google/cloud/bigtable_v2/services/bigtable/async_client.py b/google/cloud/bigtable_v2/services/bigtable/async_client.py index 2cdc638e0..e5a0b13e2 100644 --- a/google/cloud/bigtable_v2/services/bigtable/async_client.py +++ b/google/cloud/bigtable_v2/services/bigtable/async_client.py @@ -229,33 +229,6 @@ def read_rows( each row will still be preserved. See the ReadRowsResponse documentation for details. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - async def sample_read_rows(): - # Create a client - client = bigtable_v2.BigtableAsyncClient() - - # Initialize request argument(s) - request = bigtable_v2.ReadRowsRequest( - table_name="table_name_value", - ) - - # Make the request - stream = await client.read_rows(request=request) - - # Handle the response - async for response in stream: - print(response) - Args: request (Optional[Union[google.cloud.bigtable_v2.types.ReadRowsRequest, dict]]): The request object. Request message for @@ -349,33 +322,6 @@ def sample_row_keys( to break up the data for distributed tasks like mapreduces. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - async def sample_sample_row_keys(): - # Create a client - client = bigtable_v2.BigtableAsyncClient() - - # Initialize request argument(s) - request = bigtable_v2.SampleRowKeysRequest( - table_name="table_name_value", - ) - - # Make the request - stream = await client.sample_row_keys(request=request) - - # Handle the response - async for response in stream: - print(response) - Args: request (Optional[Union[google.cloud.bigtable_v2.types.SampleRowKeysRequest, dict]]): The request object. Request message for @@ -470,33 +416,6 @@ async def mutate_row( r"""Mutates a row atomically. Cells already present in the row are left unchanged unless explicitly changed by ``mutation``. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - async def sample_mutate_row(): - # Create a client - client = bigtable_v2.BigtableAsyncClient() - - # Initialize request argument(s) - request = bigtable_v2.MutateRowRequest( - table_name="table_name_value", - row_key=b'row_key_blob', - ) - - # Make the request - response = await client.mutate_row(request=request) - - # Handle the response - print(response) - Args: request (Optional[Union[google.cloud.bigtable_v2.types.MutateRowRequest, dict]]): The request object. Request message for @@ -613,33 +532,6 @@ def mutate_rows( is mutated atomically as in MutateRow, but the entire batch is not executed atomically. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - async def sample_mutate_rows(): - # Create a client - client = bigtable_v2.BigtableAsyncClient() - - # Initialize request argument(s) - request = bigtable_v2.MutateRowsRequest( - table_name="table_name_value", - ) - - # Make the request - stream = await client.mutate_rows(request=request) - - # Handle the response - async for response in stream: - print(response) - Args: request (Optional[Union[google.cloud.bigtable_v2.types.MutateRowsRequest, dict]]): The request object. Request message for @@ -752,33 +644,6 @@ async def check_and_mutate_row( r"""Mutates a row atomically based on the output of a predicate Reader filter. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - async def sample_check_and_mutate_row(): - # Create a client - client = bigtable_v2.BigtableAsyncClient() - - # Initialize request argument(s) - request = bigtable_v2.CheckAndMutateRowRequest( - table_name="table_name_value", - row_key=b'row_key_blob', - ) - - # Make the request - response = await client.check_and_mutate_row(request=request) - - # Handle the response - print(response) - Args: request (Optional[Union[google.cloud.bigtable_v2.types.CheckAndMutateRowRequest, dict]]): The request object. Request message for @@ -930,32 +795,6 @@ async def ping_and_warm( connection. This call is not required but may be useful for connection keep-alive. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - async def sample_ping_and_warm(): - # Create a client - client = bigtable_v2.BigtableAsyncClient() - - # Initialize request argument(s) - request = bigtable_v2.PingAndWarmRequest( - name="name_value", - ) - - # Make the request - response = await client.ping_and_warm(request=request) - - # Handle the response - print(response) - Args: request (Optional[Union[google.cloud.bigtable_v2.types.PingAndWarmRequest, dict]]): The request object. Request message for client @@ -1054,37 +893,6 @@ async def read_modify_write_row( or the current server time. The method returns the new contents of all modified cells. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - async def sample_read_modify_write_row(): - # Create a client - client = bigtable_v2.BigtableAsyncClient() - - # Initialize request argument(s) - rules = bigtable_v2.ReadModifyWriteRule() - rules.append_value = b'append_value_blob' - - request = bigtable_v2.ReadModifyWriteRowRequest( - table_name="table_name_value", - row_key=b'row_key_blob', - rules=rules, - ) - - # Make the request - response = await client.read_modify_write_row(request=request) - - # Handle the response - print(response) - Args: request (Optional[Union[google.cloud.bigtable_v2.types.ReadModifyWriteRowRequest, dict]]): The request object. Request message for @@ -1268,7 +1076,7 @@ def generate_initial_change_stream_partitions( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.generate_initial_change_stream_partitions, - default_timeout=60.0, + default_timeout=None, client_info=DEFAULT_CLIENT_INFO, ) @@ -1366,7 +1174,7 @@ def read_change_stream( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.read_change_stream, - default_timeout=43200.0, + default_timeout=None, client_info=DEFAULT_CLIENT_INFO, ) diff --git a/google/cloud/bigtable_v2/services/bigtable/client.py b/google/cloud/bigtable_v2/services/bigtable/client.py index ae90e67cf..3165f9160 100644 --- a/google/cloud/bigtable_v2/services/bigtable/client.py +++ b/google/cloud/bigtable_v2/services/bigtable/client.py @@ -382,6 +382,9 @@ def __init__( transport (Union[str, BigtableTransport]): The transport to use. If set to None, a transport is chosen automatically. + NOTE: "rest" transport functionality is currently in a + beta state (preview). We welcome your feedback via an + issue in this library's source repository. client_options (Optional[Union[google.api_core.client_options.ClientOptions, dict]]): Custom options for the client. It won't take effect if a ``transport`` instance is provided. (1) The ``api_endpoint`` property can be used to override the @@ -480,33 +483,6 @@ def read_rows( each row will still be preserved. See the ReadRowsResponse documentation for details. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - def sample_read_rows(): - # Create a client - client = bigtable_v2.BigtableClient() - - # Initialize request argument(s) - request = bigtable_v2.ReadRowsRequest( - table_name="table_name_value", - ) - - # Make the request - stream = client.read_rows(request=request) - - # Handle the response - for response in stream: - print(response) - Args: request (Union[google.cloud.bigtable_v2.types.ReadRowsRequest, dict]): The request object. Request message for @@ -609,33 +585,6 @@ def sample_row_keys( to break up the data for distributed tasks like mapreduces. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - def sample_sample_row_keys(): - # Create a client - client = bigtable_v2.BigtableClient() - - # Initialize request argument(s) - request = bigtable_v2.SampleRowKeysRequest( - table_name="table_name_value", - ) - - # Make the request - stream = client.sample_row_keys(request=request) - - # Handle the response - for response in stream: - print(response) - Args: request (Union[google.cloud.bigtable_v2.types.SampleRowKeysRequest, dict]): The request object. Request message for @@ -739,33 +688,6 @@ def mutate_row( r"""Mutates a row atomically. Cells already present in the row are left unchanged unless explicitly changed by ``mutation``. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - def sample_mutate_row(): - # Create a client - client = bigtable_v2.BigtableClient() - - # Initialize request argument(s) - request = bigtable_v2.MutateRowRequest( - table_name="table_name_value", - row_key=b'row_key_blob', - ) - - # Make the request - response = client.mutate_row(request=request) - - # Handle the response - print(response) - Args: request (Union[google.cloud.bigtable_v2.types.MutateRowRequest, dict]): The request object. Request message for @@ -891,33 +813,6 @@ def mutate_rows( is mutated atomically as in MutateRow, but the entire batch is not executed atomically. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - def sample_mutate_rows(): - # Create a client - client = bigtable_v2.BigtableClient() - - # Initialize request argument(s) - request = bigtable_v2.MutateRowsRequest( - table_name="table_name_value", - ) - - # Make the request - stream = client.mutate_rows(request=request) - - # Handle the response - for response in stream: - print(response) - Args: request (Union[google.cloud.bigtable_v2.types.MutateRowsRequest, dict]): The request object. Request message for @@ -1039,33 +934,6 @@ def check_and_mutate_row( r"""Mutates a row atomically based on the output of a predicate Reader filter. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - def sample_check_and_mutate_row(): - # Create a client - client = bigtable_v2.BigtableClient() - - # Initialize request argument(s) - request = bigtable_v2.CheckAndMutateRowRequest( - table_name="table_name_value", - row_key=b'row_key_blob', - ) - - # Make the request - response = client.check_and_mutate_row(request=request) - - # Handle the response - print(response) - Args: request (Union[google.cloud.bigtable_v2.types.CheckAndMutateRowRequest, dict]): The request object. Request message for @@ -1226,32 +1094,6 @@ def ping_and_warm( connection. This call is not required but may be useful for connection keep-alive. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - def sample_ping_and_warm(): - # Create a client - client = bigtable_v2.BigtableClient() - - # Initialize request argument(s) - request = bigtable_v2.PingAndWarmRequest( - name="name_value", - ) - - # Make the request - response = client.ping_and_warm(request=request) - - # Handle the response - print(response) - Args: request (Union[google.cloud.bigtable_v2.types.PingAndWarmRequest, dict]): The request object. Request message for client @@ -1359,37 +1201,6 @@ def read_modify_write_row( or the current server time. The method returns the new contents of all modified cells. - .. code-block:: python - - # This snippet has been automatically generated and should be regarded as a - # code template only. - # It will require modifications to work: - # - It may require correct/in-range values for request initialization. - # - It may require specifying regional endpoints when creating the service - # client as shown in: - # https://googleapis.dev/python/google-api-core/latest/client_options.html - from google import bigtable_v2 - - def sample_read_modify_write_row(): - # Create a client - client = bigtable_v2.BigtableClient() - - # Initialize request argument(s) - rules = bigtable_v2.ReadModifyWriteRule() - rules.append_value = b'append_value_blob' - - request = bigtable_v2.ReadModifyWriteRowRequest( - table_name="table_name_value", - row_key=b'row_key_blob', - rules=rules, - ) - - # Make the request - response = client.read_modify_write_row(request=request) - - # Handle the response - print(response) - Args: request (Union[google.cloud.bigtable_v2.types.ReadModifyWriteRowRequest, dict]): The request object. Request message for diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/base.py b/google/cloud/bigtable_v2/services/bigtable/transports/base.py index 884bb1275..5879a63cb 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/base.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/base.py @@ -175,16 +175,6 @@ def _prep_wrapped_messages(self, client_info): default_timeout=None, client_info=client_info, ), - self.generate_initial_change_stream_partitions: gapic_v1.method.wrap_method( - self.generate_initial_change_stream_partitions, - default_timeout=60.0, - client_info=client_info, - ), - self.read_change_stream: gapic_v1.method.wrap_method( - self.read_change_stream, - default_timeout=43200.0, - client_info=client_info, - ), } def close(self): diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/rest.py b/google/cloud/bigtable_v2/services/bigtable/transports/rest.py index ee9cb046f..6c786f6b3 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/rest.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/rest.py @@ -365,6 +365,9 @@ class BigtableRestTransport(BigtableTransport): It sends JSON representations of protocol buffers over HTTP/1.1 + NOTE: This REST transport functionality is currently in a beta + state (preview). We welcome your feedback via an issue in this + library's source repository. Thank you! """ def __init__( @@ -384,35 +387,39 @@ def __init__( ) -> None: """Instantiate the transport. - Args: - host (Optional[str]): - The hostname to connect to. - credentials (Optional[google.auth.credentials.Credentials]): The - authorization credentials to attach to requests. These - credentials identify the application to the service; if none - are specified, the client will attempt to ascertain the - credentials from the environment. - - credentials_file (Optional[str]): A file with credentials that can - be loaded with :func:`google.auth.load_credentials_from_file`. - This argument is ignored if ``channel`` is provided. - scopes (Optional(Sequence[str])): A list of scopes. This argument is - ignored if ``channel`` is provided. - client_cert_source_for_mtls (Callable[[], Tuple[bytes, bytes]]): Client - certificate to configure mutual TLS HTTP channel. It is ignored - if ``channel`` is provided. - quota_project_id (Optional[str]): An optional project to use for billing - and quota. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you are developing - your own client library. - always_use_jwt_access (Optional[bool]): Whether self signed JWT should - be used for service account credentials. - url_scheme: the protocol scheme for the API endpoint. Normally - "https", but for testing or local servers, - "http" can be specified. + NOTE: This REST transport functionality is currently in a beta + state (preview). We welcome your feedback via a GitHub issue in + this library's repository. Thank you! + + Args: + host (Optional[str]): + The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional(Sequence[str])): A list of scopes. This argument is + ignored if ``channel`` is provided. + client_cert_source_for_mtls (Callable[[], Tuple[bytes, bytes]]): Client + certificate to configure mutual TLS HTTP channel. It is ignored + if ``channel`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you are developing + your own client library. + always_use_jwt_access (Optional[bool]): Whether self signed JWT should + be used for service account credentials. + url_scheme: the protocol scheme for the API endpoint. Normally + "https", but for testing or local servers, + "http" can be specified. """ # Run the base constructor # TODO(yon-mg): resolve other ctor params i.e. scopes, quota, etc. @@ -503,7 +510,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -513,13 +520,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -614,7 +619,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -624,13 +629,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -714,7 +717,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -724,13 +727,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -813,7 +814,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -823,13 +824,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -912,7 +911,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -922,13 +921,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1015,7 +1012,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -1025,13 +1022,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1115,7 +1110,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -1125,13 +1120,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1214,7 +1207,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -1224,13 +1217,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1312,13 +1303,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index 143678ae5..a39a0290e 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -2622,23 +2622,6 @@ def test_generate_initial_change_stream_partitions_field_headers(): "table_name=table_name_value", ) in kw["metadata"] - # Every method on the transport should just blindly - # raise NotImplementedError. - methods = ( - "read_rows", - "sample_row_keys", - "mutate_row", - "mutate_rows", - "check_and_mutate_row", - "ping_and_warm", - "read_modify_write_row", - "generate_initial_change_stream_partitions", - "read_change_stream", - ) - for method in methods: - with pytest.raises(NotImplementedError): - getattr(transport, method)(request=object()) - @pytest.mark.asyncio async def test_generate_initial_change_stream_partitions_field_headers_async(): @@ -3145,7 +3128,7 @@ def test_read_rows_rest_required_fields(request_type=bigtable.ReadRowsRequest): iter_content.return_value = iter(json_return_value) response = client.read_rows(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -3431,7 +3414,7 @@ def test_sample_row_keys_rest_required_fields( iter_content.return_value = iter(json_return_value) response = client.sample_row_keys(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -3703,7 +3686,7 @@ def test_mutate_row_rest_required_fields(request_type=bigtable.MutateRowRequest) response = client.mutate_row(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -3998,7 +3981,7 @@ def test_mutate_rows_rest_required_fields(request_type=bigtable.MutateRowsReques iter_content.return_value = iter(json_return_value) response = client.mutate_rows(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -4285,7 +4268,7 @@ def test_check_and_mutate_row_rest_required_fields( response = client.check_and_mutate_row(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -4603,7 +4586,7 @@ def test_ping_and_warm_rest_required_fields(request_type=bigtable.PingAndWarmReq response = client.ping_and_warm(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -4869,7 +4852,7 @@ def test_read_modify_write_row_rest_required_fields( response = client.read_modify_write_row(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -5168,7 +5151,7 @@ def test_generate_initial_change_stream_partitions_rest_required_fields( iter_content.return_value = iter(json_return_value) response = client.generate_initial_change_stream_partitions(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -5467,7 +5450,7 @@ def test_read_change_stream_rest_required_fields( iter_content.return_value = iter(json_return_value) response = client.read_change_stream(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -5711,6 +5694,7 @@ def test_transport_get_channel(): [ transports.BigtableGrpcTransport, transports.BigtableGrpcAsyncIOTransport, + transports.PooledBigtableGrpcAsyncIOTransport, transports.BigtableRestTransport, ], ) @@ -5989,6 +5973,60 @@ def test_bigtable_grpc_transport_client_cert_source_for_mtls(transport_class): ) +@pytest.mark.parametrize( + "transport_class", [transports.PooledBigtableGrpcAsyncIOTransport] +) +def test_bigtable_pooled_grpc_transport_client_cert_source_for_mtls(transport_class): + cred = ga_credentials.AnonymousCredentials() + + # test with invalid pool size + with pytest.raises(ValueError): + transport_class( + host="squid.clam.whelk", + credentials=cred, + pool_size=0, + ) + + # Check ssl_channel_credentials is used if provided. + for pool_num in range(1, 5): + with mock.patch.object( + transport_class, "create_channel" + ) as mock_create_channel: + mock_ssl_channel_creds = mock.Mock() + transport_class( + host="squid.clam.whelk", + credentials=cred, + ssl_channel_credentials=mock_ssl_channel_creds, + pool_size=pool_num, + ) + mock_create_channel.assert_called_with( + "squid.clam.whelk:443", + credentials=cred, + credentials_file=None, + scopes=None, + ssl_credentials=mock_ssl_channel_creds, + quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + assert mock_create_channel.call_count == pool_num + + # Check if ssl_channel_credentials is not provided, then client_cert_source_for_mtls + # is used. + with mock.patch.object(transport_class, "create_channel", return_value=mock.Mock()): + with mock.patch("grpc.ssl_channel_credentials") as mock_ssl_cred: + transport_class( + credentials=cred, + client_cert_source_for_mtls=client_cert_source_callback, + ) + expected_cert, expected_key = client_cert_source_callback() + mock_ssl_cred.assert_called_once_with( + certificate_chain=expected_cert, private_key=expected_key + ) + + def test_bigtable_http_transport_client_cert_source_for_mtls(): cred = ga_credentials.AnonymousCredentials() with mock.patch( From 69175938d1a60e8d52e28607a30eaf6fd0f87dfe Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 23 Mar 2023 17:10:21 -0700 Subject: [PATCH 095/349] improved close functionality --- gapic-generator-fork | 2 +- .../bigtable/transports/pooled_grpc_asyncio.py | 5 +++-- tests/unit/gapic/bigtable_v2/test_bigtable.py | 16 ++++++++++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 78be45a70..6357f8f53 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 78be45a70d87092acdef9d32f12bfc79d2e46941 +Subproject commit 6357f8f53f571f73b321dd8e06b13894ba59648d diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index 1a96750eb..832f6fe2b 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import asyncio import warnings from typing import Awaitable, Callable, Dict, Optional, Sequence, Tuple, Union, List @@ -571,8 +572,8 @@ def read_change_stream( return self._stubs[stub_key] def close(self): - for channel in self._channel_pool: - channel.close() + close_fns = [channel.close() for channel in self.channel_pool] + return asyncio.gather(*close_fns) __all__ = ("PooledBigtableGrpcAsyncIOTransport",) diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index a39a0290e..5e3d5b9c1 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -6437,6 +6437,22 @@ async def test_transport_close_async(): async with client: close.assert_not_called() close.assert_called_once() + close.assert_awaited() + + +@pytest.mark.asyncio +async def test_pooled_transport_close_async(): + client = BigtableAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + transport="pooled_grpc_asyncio", + ) + num_channels = len(client.transport.channel_pool) + with mock.patch.object(type(client.transport.channel_pool[0]), "close") as close: + async with client: + close.assert_not_called() + close.assert_called() + assert close.call_count == num_channels + close.assert_awaited() def test_transport_close(): From 7b5ecbba8d035d5002a0cbc9d2354c28138c7848 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 24 Mar 2023 16:29:40 -0700 Subject: [PATCH 096/349] updated submodule --- gapic-generator-fork | 2 +- .../transports/pooled_grpc_asyncio.py | 175 ++++---- tests/unit/gapic/bigtable_v2/test_bigtable.py | 379 ++++++++++++++++++ 3 files changed, 463 insertions(+), 93 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 6357f8f53..dba782b6b 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 6357f8f53f571f73b321dd8e06b13894ba59648d +Subproject commit dba782b6b9f12f25bf79b567012b78e40ca1ae1e diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index 832f6fe2b..e91898435 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -245,6 +245,10 @@ async def replace_channel( new_channel(grpc.aio.Channel): a new channel to insert into the pool at `channel_idx`. If `None`, a new channel will be created. """ + if channel_idx >= len(self.channel_pool) or channel_idx < 0: + raise ValueError( + f"invalid channel_idx {channel_idx} for pool size {len(self.channel_pool)}" + ) if new_channel is None: new_channel = self.create_channel( self._host, @@ -262,16 +266,14 @@ async def replace_channel( self.channel_pool[channel_idx] = new_channel await old_channel.close(grace=grace) # invalidate stubs - for stub_channel, stub_func in self._stubs.keys(): + stub_keys = list(self._stubs.keys()) + for stub_channel, stub_func in stub_keys: if stub_channel == old_channel: del self._stubs[(stub_channel, stub_func)] return new_channel - @property - def read_rows( - self, - ) -> Callable[[bigtable.ReadRowsRequest], Awaitable[bigtable.ReadRowsResponse]]: - r"""Return a callable for the read rows method over gRPC. + def read_rows(self, *args, **kwargs) -> Awaitable[bigtable.ReadRowsResponse]: + r"""Function for calling the read rows method over gRPC. Streams back the contents of all requested rows in key order, optionally applying the same Reader filter to @@ -291,23 +293,22 @@ def read_rows( # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. next_channel = self.next_channel() - print(f"USING CHANNEL: {self._next_idx}") stub_key = (next_channel, "read_rows") - if stub_key not in self._stubs: - self._stubs[stub_key] = next_channel.unary_stream( + stub_func = self._stubs.get(stub_key, None) + if stub_func is None: + stub_func = next_channel.unary_stream( "/google.bigtable.v2.Bigtable/ReadRows", request_serializer=bigtable.ReadRowsRequest.serialize, response_deserializer=bigtable.ReadRowsResponse.deserialize, ) - return self._stubs[stub_key] + self._stubs[stub_key] = stub_func + # call stub + return stub_func(*args, **kwargs) - @property def sample_row_keys( - self, - ) -> Callable[ - [bigtable.SampleRowKeysRequest], Awaitable[bigtable.SampleRowKeysResponse] - ]: - r"""Return a callable for the sample row keys method over gRPC. + self, *args, **kwargs + ) -> Awaitable[bigtable.SampleRowKeysResponse]: + r"""Function for calling the sample row keys method over gRPC. Returns a sample of row keys in the table. The returned row keys will delimit contiguous sections of @@ -326,21 +327,20 @@ def sample_row_keys( # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. next_channel = self.next_channel() - print(f"USING CHANNEL: {self._next_idx}") stub_key = (next_channel, "sample_row_keys") - if stub_key not in self._stubs: - self._stubs[stub_key] = next_channel.unary_stream( + stub_func = self._stubs.get(stub_key, None) + if stub_func is None: + stub_func = next_channel.unary_stream( "/google.bigtable.v2.Bigtable/SampleRowKeys", request_serializer=bigtable.SampleRowKeysRequest.serialize, response_deserializer=bigtable.SampleRowKeysResponse.deserialize, ) - return self._stubs[stub_key] + self._stubs[stub_key] = stub_func + # call stub + return stub_func(*args, **kwargs) - @property - def mutate_row( - self, - ) -> Callable[[bigtable.MutateRowRequest], Awaitable[bigtable.MutateRowResponse]]: - r"""Return a callable for the mutate row method over gRPC. + def mutate_row(self, *args, **kwargs) -> Awaitable[bigtable.MutateRowResponse]: + r"""Function for calling the mutate row method over gRPC. Mutates a row atomically. Cells already present in the row are left unchanged unless explicitly changed by ``mutation``. @@ -356,21 +356,20 @@ def mutate_row( # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. next_channel = self.next_channel() - print(f"USING CHANNEL: {self._next_idx}") stub_key = (next_channel, "mutate_row") - if stub_key not in self._stubs: - self._stubs[stub_key] = next_channel.unary_unary( + stub_func = self._stubs.get(stub_key, None) + if stub_func is None: + stub_func = next_channel.unary_unary( "/google.bigtable.v2.Bigtable/MutateRow", request_serializer=bigtable.MutateRowRequest.serialize, response_deserializer=bigtable.MutateRowResponse.deserialize, ) - return self._stubs[stub_key] + self._stubs[stub_key] = stub_func + # call stub + return stub_func(*args, **kwargs) - @property - def mutate_rows( - self, - ) -> Callable[[bigtable.MutateRowsRequest], Awaitable[bigtable.MutateRowsResponse]]: - r"""Return a callable for the mutate rows method over gRPC. + def mutate_rows(self, *args, **kwargs) -> Awaitable[bigtable.MutateRowsResponse]: + r"""Function for calling the mutate rows method over gRPC. Mutates multiple rows in a batch. Each individual row is mutated atomically as in MutateRow, but the entire @@ -387,24 +386,22 @@ def mutate_rows( # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. next_channel = self.next_channel() - print(f"USING CHANNEL: {self._next_idx}") stub_key = (next_channel, "mutate_rows") - if stub_key not in self._stubs: - self._stubs[stub_key] = next_channel.unary_stream( + stub_func = self._stubs.get(stub_key, None) + if stub_func is None: + stub_func = next_channel.unary_stream( "/google.bigtable.v2.Bigtable/MutateRows", request_serializer=bigtable.MutateRowsRequest.serialize, response_deserializer=bigtable.MutateRowsResponse.deserialize, ) - return self._stubs[stub_key] + self._stubs[stub_key] = stub_func + # call stub + return stub_func(*args, **kwargs) - @property def check_and_mutate_row( - self, - ) -> Callable[ - [bigtable.CheckAndMutateRowRequest], - Awaitable[bigtable.CheckAndMutateRowResponse], - ]: - r"""Return a callable for the check and mutate row method over gRPC. + self, *args, **kwargs + ) -> Awaitable[bigtable.CheckAndMutateRowResponse]: + r"""Function for calling the check and mutate row method over gRPC. Mutates a row atomically based on the output of a predicate Reader filter. @@ -420,23 +417,20 @@ def check_and_mutate_row( # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. next_channel = self.next_channel() - print(f"USING CHANNEL: {self._next_idx}") stub_key = (next_channel, "check_and_mutate_row") - if stub_key not in self._stubs: - self._stubs[stub_key] = next_channel.unary_unary( + stub_func = self._stubs.get(stub_key, None) + if stub_func is None: + stub_func = next_channel.unary_unary( "/google.bigtable.v2.Bigtable/CheckAndMutateRow", request_serializer=bigtable.CheckAndMutateRowRequest.serialize, response_deserializer=bigtable.CheckAndMutateRowResponse.deserialize, ) - return self._stubs[stub_key] + self._stubs[stub_key] = stub_func + # call stub + return stub_func(*args, **kwargs) - @property - def ping_and_warm( - self, - ) -> Callable[ - [bigtable.PingAndWarmRequest], Awaitable[bigtable.PingAndWarmResponse] - ]: - r"""Return a callable for the ping and warm method over gRPC. + def ping_and_warm(self, *args, **kwargs) -> Awaitable[bigtable.PingAndWarmResponse]: + r"""Function for calling the ping and warm method over gRPC. Warm up associated instance metadata for this connection. This call is not required but may be useful @@ -453,24 +447,22 @@ def ping_and_warm( # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. next_channel = self.next_channel() - print(f"USING CHANNEL: {self._next_idx}") stub_key = (next_channel, "ping_and_warm") - if stub_key not in self._stubs: - self._stubs[stub_key] = next_channel.unary_unary( + stub_func = self._stubs.get(stub_key, None) + if stub_func is None: + stub_func = next_channel.unary_unary( "/google.bigtable.v2.Bigtable/PingAndWarm", request_serializer=bigtable.PingAndWarmRequest.serialize, response_deserializer=bigtable.PingAndWarmResponse.deserialize, ) - return self._stubs[stub_key] + self._stubs[stub_key] = stub_func + # call stub + return stub_func(*args, **kwargs) - @property def read_modify_write_row( - self, - ) -> Callable[ - [bigtable.ReadModifyWriteRowRequest], - Awaitable[bigtable.ReadModifyWriteRowResponse], - ]: - r"""Return a callable for the read modify write row method over gRPC. + self, *args, **kwargs + ) -> Awaitable[bigtable.ReadModifyWriteRowResponse]: + r"""Function for calling the read modify write row method over gRPC. Modifies a row atomically on the server. The method reads the latest existing timestamp and value from the @@ -491,24 +483,22 @@ def read_modify_write_row( # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. next_channel = self.next_channel() - print(f"USING CHANNEL: {self._next_idx}") stub_key = (next_channel, "read_modify_write_row") - if stub_key not in self._stubs: - self._stubs[stub_key] = next_channel.unary_unary( + stub_func = self._stubs.get(stub_key, None) + if stub_func is None: + stub_func = next_channel.unary_unary( "/google.bigtable.v2.Bigtable/ReadModifyWriteRow", request_serializer=bigtable.ReadModifyWriteRowRequest.serialize, response_deserializer=bigtable.ReadModifyWriteRowResponse.deserialize, ) - return self._stubs[stub_key] + self._stubs[stub_key] = stub_func + # call stub + return stub_func(*args, **kwargs) - @property def generate_initial_change_stream_partitions( - self, - ) -> Callable[ - [bigtable.GenerateInitialChangeStreamPartitionsRequest], - Awaitable[bigtable.GenerateInitialChangeStreamPartitionsResponse], - ]: - r"""Return a callable for the generate initial change stream + self, *args, **kwargs + ) -> Awaitable[bigtable.GenerateInitialChangeStreamPartitionsResponse]: + r"""Function for calling the generate initial change stream partitions method over gRPC. NOTE: This API is intended to be used by Apache Beam BigtableIO. @@ -527,23 +517,22 @@ def generate_initial_change_stream_partitions( # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. next_channel = self.next_channel() - print(f"USING CHANNEL: {self._next_idx}") stub_key = (next_channel, "generate_initial_change_stream_partitions") - if stub_key not in self._stubs: - self._stubs[stub_key] = next_channel.unary_stream( + stub_func = self._stubs.get(stub_key, None) + if stub_func is None: + stub_func = next_channel.unary_stream( "/google.bigtable.v2.Bigtable/GenerateInitialChangeStreamPartitions", request_serializer=bigtable.GenerateInitialChangeStreamPartitionsRequest.serialize, response_deserializer=bigtable.GenerateInitialChangeStreamPartitionsResponse.deserialize, ) - return self._stubs[stub_key] + self._stubs[stub_key] = stub_func + # call stub + return stub_func(*args, **kwargs) - @property def read_change_stream( - self, - ) -> Callable[ - [bigtable.ReadChangeStreamRequest], Awaitable[bigtable.ReadChangeStreamResponse] - ]: - r"""Return a callable for the read change stream method over gRPC. + self, *args, **kwargs + ) -> Awaitable[bigtable.ReadChangeStreamResponse]: + r"""Function for calling the read change stream method over gRPC. NOTE: This API is intended to be used by Apache Beam BigtableIO. Reads changes from a table's change stream. @@ -561,15 +550,17 @@ def read_change_stream( # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. next_channel = self.next_channel() - print(f"USING CHANNEL: {self._next_idx}") stub_key = (next_channel, "read_change_stream") - if stub_key not in self._stubs: - self._stubs[stub_key] = next_channel.unary_stream( + stub_func = self._stubs.get(stub_key, None) + if stub_func is None: + stub_func = next_channel.unary_stream( "/google.bigtable.v2.Bigtable/ReadChangeStream", request_serializer=bigtable.ReadChangeStreamRequest.serialize, response_deserializer=bigtable.ReadChangeStreamResponse.deserialize, ) - return self._stubs[stub_key] + self._stubs[stub_key] = stub_func + # call stub + return stub_func(*args, **kwargs) def close(self): close_fns = [channel.close() for channel in self.channel_pool] diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index 5e3d5b9c1..f6d514d65 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -743,6 +743,34 @@ def test_read_rows(request_type, transport: str = "grpc"): assert isinstance(message, bigtable.ReadRowsResponse) +def test_read_rows_pooled_rotation(transport: str = "pooled_grpc_asyncio"): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} + + with mock.patch.object(type(client.transport), "next_channel") as next_channel: + channel = client.transport.channel_pool[client.transport._next_idx] + next_channel.return_value = channel + + response = client.read_rows(request) + + # Establish that next_channel was called + next_channel.assert_called_once() + # Establish that stubs has been populated for the channel + stub_key = (channel, "read_rows") + assert client.transport._stubs[stub_key] is not None + # Establish that subsequent calls all call next_channel + starting_idx = client.transport._next_idx + for i in range(2, 10): + response = client.read_rows(request) + assert next_channel.call_count == i + + def test_read_rows_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -962,6 +990,34 @@ def test_sample_row_keys(request_type, transport: str = "grpc"): assert isinstance(message, bigtable.SampleRowKeysResponse) +def test_sample_row_keys_pooled_rotation(transport: str = "pooled_grpc_asyncio"): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} + + with mock.patch.object(type(client.transport), "next_channel") as next_channel: + channel = client.transport.channel_pool[client.transport._next_idx] + next_channel.return_value = channel + + response = client.sample_row_keys(request) + + # Establish that next_channel was called + next_channel.assert_called_once() + # Establish that stubs has been populated for the channel + stub_key = (channel, "sample_row_keys") + assert client.transport._stubs[stub_key] is not None + # Establish that subsequent calls all call next_channel + starting_idx = client.transport._next_idx + for i in range(2, 10): + response = client.sample_row_keys(request) + assert next_channel.call_count == i + + def test_sample_row_keys_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -1180,6 +1236,34 @@ def test_mutate_row(request_type, transport: str = "grpc"): assert isinstance(response, bigtable.MutateRowResponse) +def test_mutate_row_pooled_rotation(transport: str = "pooled_grpc_asyncio"): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} + + with mock.patch.object(type(client.transport), "next_channel") as next_channel: + channel = client.transport.channel_pool[client.transport._next_idx] + next_channel.return_value = channel + + response = client.mutate_row(request) + + # Establish that next_channel was called + next_channel.assert_called_once() + # Establish that stubs has been populated for the channel + stub_key = (channel, "mutate_row") + assert client.transport._stubs[stub_key] is not None + # Establish that subsequent calls all call next_channel + starting_idx = client.transport._next_idx + for i in range(2, 10): + response = client.mutate_row(request) + assert next_channel.call_count == i + + def test_mutate_row_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -1443,6 +1527,34 @@ def test_mutate_rows(request_type, transport: str = "grpc"): assert isinstance(message, bigtable.MutateRowsResponse) +def test_mutate_rows_pooled_rotation(transport: str = "pooled_grpc_asyncio"): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} + + with mock.patch.object(type(client.transport), "next_channel") as next_channel: + channel = client.transport.channel_pool[client.transport._next_idx] + next_channel.return_value = channel + + response = client.mutate_rows(request) + + # Establish that next_channel was called + next_channel.assert_called_once() + # Establish that stubs has been populated for the channel + stub_key = (channel, "mutate_rows") + assert client.transport._stubs[stub_key] is not None + # Establish that subsequent calls all call next_channel + starting_idx = client.transport._next_idx + for i in range(2, 10): + response = client.mutate_rows(request) + assert next_channel.call_count == i + + def test_mutate_rows_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -1676,6 +1788,34 @@ def test_check_and_mutate_row(request_type, transport: str = "grpc"): assert response.predicate_matched is True +def test_check_and_mutate_row_pooled_rotation(transport: str = "pooled_grpc_asyncio"): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} + + with mock.patch.object(type(client.transport), "next_channel") as next_channel: + channel = client.transport.channel_pool[client.transport._next_idx] + next_channel.return_value = channel + + response = client.check_and_mutate_row(request) + + # Establish that next_channel was called + next_channel.assert_called_once() + # Establish that stubs has been populated for the channel + stub_key = (channel, "check_and_mutate_row") + assert client.transport._stubs[stub_key] is not None + # Establish that subsequent calls all call next_channel + starting_idx = client.transport._next_idx + for i in range(2, 10): + response = client.check_and_mutate_row(request) + assert next_channel.call_count == i + + def test_check_and_mutate_row_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -2053,6 +2193,34 @@ def test_ping_and_warm(request_type, transport: str = "grpc"): assert isinstance(response, bigtable.PingAndWarmResponse) +def test_ping_and_warm_pooled_rotation(transport: str = "pooled_grpc_asyncio"): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} + + with mock.patch.object(type(client.transport), "next_channel") as next_channel: + channel = client.transport.channel_pool[client.transport._next_idx] + next_channel.return_value = channel + + response = client.ping_and_warm(request) + + # Establish that next_channel was called + next_channel.assert_called_once() + # Establish that stubs has been populated for the channel + stub_key = (channel, "ping_and_warm") + assert client.transport._stubs[stub_key] is not None + # Establish that subsequent calls all call next_channel + starting_idx = client.transport._next_idx + for i in range(2, 10): + response = client.ping_and_warm(request) + assert next_channel.call_count == i + + def test_ping_and_warm_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -2273,6 +2441,34 @@ def test_read_modify_write_row(request_type, transport: str = "grpc"): assert isinstance(response, bigtable.ReadModifyWriteRowResponse) +def test_read_modify_write_row_pooled_rotation(transport: str = "pooled_grpc_asyncio"): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} + + with mock.patch.object(type(client.transport), "next_channel") as next_channel: + channel = client.transport.channel_pool[client.transport._next_idx] + next_channel.return_value = channel + + response = client.read_modify_write_row(request) + + # Establish that next_channel was called + next_channel.assert_called_once() + # Establish that stubs has been populated for the channel + stub_key = (channel, "read_modify_write_row") + assert client.transport._stubs[stub_key] is not None + # Establish that subsequent calls all call next_channel + starting_idx = client.transport._next_idx + for i in range(2, 10): + response = client.read_modify_write_row(request) + assert next_channel.call_count == i + + def test_read_modify_write_row_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -2532,6 +2728,36 @@ def test_generate_initial_change_stream_partitions( ) +def test_generate_initial_change_stream_partitions_pooled_rotation( + transport: str = "pooled_grpc_asyncio", +): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} + + with mock.patch.object(type(client.transport), "next_channel") as next_channel: + channel = client.transport.channel_pool[client.transport._next_idx] + next_channel.return_value = channel + + response = client.generate_initial_change_stream_partitions(request) + + # Establish that next_channel was called + next_channel.assert_called_once() + # Establish that stubs has been populated for the channel + stub_key = (channel, "generate_initial_change_stream_partitions") + assert client.transport._stubs[stub_key] is not None + # Establish that subsequent calls all call next_channel + starting_idx = client.transport._next_idx + for i in range(2, 10): + response = client.generate_initial_change_stream_partitions(request) + assert next_channel.call_count == i + + def test_generate_initial_change_stream_partitions_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -2791,6 +3017,34 @@ def test_read_change_stream(request_type, transport: str = "grpc"): assert isinstance(message, bigtable.ReadChangeStreamResponse) +def test_read_change_stream_pooled_rotation(transport: str = "pooled_grpc_asyncio"): + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} + + with mock.patch.object(type(client.transport), "next_channel") as next_channel: + channel = client.transport.channel_pool[client.transport._next_idx] + next_channel.return_value = channel + + response = client.read_change_stream(request) + + # Establish that next_channel was called + next_channel.assert_called_once() + # Establish that stubs has been populated for the channel + stub_key = (channel, "read_change_stream") + assert client.transport._stubs[stub_key] is not None + # Establish that subsequent calls all call next_channel + starting_idx = client.transport._next_idx + for i in range(2, 10): + response = client.read_change_stream(request) + assert next_channel.call_count == i + + def test_read_change_stream_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -6519,3 +6773,128 @@ def test_api_key_credentials(client_class, transport_class): always_use_jwt_access=True, api_audience=None, ) + + +@pytest.mark.asyncio +async def test_pooled_transport_replace_default(): + client = BigtableAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + transport="pooled_grpc_asyncio", + ) + num_channels = len(client.transport.channel_pool) + for replace_idx in range(num_channels): + prev_pool = [channel for channel in client.transport.channel_pool] + grace_period = 4 + with mock.patch.object( + type(client.transport.channel_pool[0]), "close" + ) as close: + await client.transport.replace_channel(replace_idx, grace=grace_period) + close.assert_called_once() + close.assert_awaited() + close.assert_called_with(grace=grace_period) + assert isinstance(client.transport.channel_pool[replace_idx], grpc.aio.Channel) + # only the specified channel should be replaced + for i in range(num_channels): + if i == replace_idx: + assert client.transport.channel_pool[i] != prev_pool[i] + else: + assert client.transport.channel_pool[i] == prev_pool[i] + with pytest.raises(ValueError): + await client.transport.replace_channel(num_channels + 1) + with pytest.raises(ValueError): + await client.transport.replace_channel(-1) + + +@pytest.mark.asyncio +async def test_pooled_transport_replace_explicit(): + client = BigtableAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + transport="pooled_grpc_asyncio", + ) + num_channels = len(client.transport.channel_pool) + for replace_idx in range(num_channels): + prev_pool = [channel for channel in client.transport.channel_pool] + grace_period = 0 + with mock.patch.object( + type(client.transport.channel_pool[0]), "close" + ) as close: + new_channel = grpc.aio.insecure_channel("localhost:8080") + await client.transport.replace_channel( + replace_idx, grace=grace_period, new_channel=new_channel + ) + close.assert_called_once() + close.assert_awaited() + close.assert_called_with(grace=grace_period) + assert client.transport.channel_pool[replace_idx] == new_channel + # only the specified channel should be replaced + for i in range(num_channels): + if i == replace_idx: + assert client.transport.channel_pool[i] != prev_pool[i] + else: + assert client.transport.channel_pool[i] == prev_pool[i] + + +def test_pooled_transport_next_channel(): + num_channels = 10 + transport = transports.PooledBigtableGrpcAsyncIOTransport( + credentials=ga_credentials.AnonymousCredentials(), + pool_size=num_channels, + ) + assert len(transport.channel_pool) == num_channels + transport._next_idx = 0 + # rotate through all channels multiple times + num_cycles = 4 + for _ in range(num_cycles): + for i in range(num_channels - 1): + assert transport._next_idx == i + got_channel = transport.next_channel() + assert got_channel == transport.channel_pool[i] + assert transport._next_idx == (i + 1) + # test wrap around + assert transport._next_idx == num_channels - 1 + got_channel = transport.next_channel() + assert got_channel == transport.channel_pool[num_channels - 1] + assert transport._next_idx == 0 + + +def test_pooled_transport_pool_unique_channels(): + num_channels = 50 + + transport = transports.PooledBigtableGrpcAsyncIOTransport( + credentials=ga_credentials.AnonymousCredentials(), + pool_size=num_channels, + ) + channel_list = [channel for channel in transport.channel_pool] + channel_set = set(channel_list) + assert len(channel_list) == num_channels + assert len(channel_set) == num_channels + for channel in channel_list: + assert isinstance(channel, grpc.aio.Channel) + + +def test_pooled_transport_pool_creation(): + # channels should be created with the specified options + num_channels = 50 + creds = ga_credentials.AnonymousCredentials() + scopes = ["test1", "test2"] + quota_project_id = "test3" + host = "testhost:8080" + + with mock.patch.object( + transports.PooledBigtableGrpcAsyncIOTransport, "create_channel" + ) as create_channel: + transport = transports.PooledBigtableGrpcAsyncIOTransport( + credentials=creds, + pool_size=num_channels, + scopes=scopes, + quota_project_id=quota_project_id, + host=host, + ) + assert create_channel.call_count == num_channels + for i in range(num_channels): + args = create_channel.call_args_list[i][0] + assert args[0] == host + kwargs = create_channel.call_args_list[i][1] + assert kwargs["credentials"] == creds + assert kwargs["scopes"] == scopes + assert kwargs["quota_project_id"] == quota_project_id From c0616dd87fa097c5a43c2ce0f99b4dd034b8c0db Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 24 Mar 2023 16:41:34 -0700 Subject: [PATCH 097/349] reverted some generated changes --- .../services/bigtable/async_client.py | 41 +++--- .../bigtable_v2/services/bigtable/client.py | 18 ++- .../services/bigtable/transports/base.py | 26 ++-- .../services/bigtable/transports/rest.py | 126 +++++++++--------- 4 files changed, 116 insertions(+), 95 deletions(-) diff --git a/google/cloud/bigtable_v2/services/bigtable/async_client.py b/google/cloud/bigtable_v2/services/bigtable/async_client.py index e5a0b13e2..1233e1288 100644 --- a/google/cloud/bigtable_v2/services/bigtable/async_client.py +++ b/google/cloud/bigtable_v2/services/bigtable/async_client.py @@ -283,7 +283,7 @@ def read_rows( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.read_rows, - default_timeout=None, + default_timeout=43200.0, client_info=DEFAULT_CLIENT_INFO, ) @@ -378,7 +378,7 @@ def sample_row_keys( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.sample_row_keys, - default_timeout=None, + default_timeout=60.0, client_info=DEFAULT_CLIENT_INFO, ) @@ -494,7 +494,17 @@ async def mutate_row( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.mutate_row, - default_timeout=None, + default_retry=retries.Retry( + initial=0.01, + maximum=60.0, + multiplier=2, + predicate=retries.if_exception_type( + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + ), + deadline=60.0, + ), + default_timeout=60.0, client_info=DEFAULT_CLIENT_INFO, ) @@ -604,7 +614,7 @@ def mutate_rows( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.mutate_rows, - default_timeout=None, + default_timeout=600.0, client_info=DEFAULT_CLIENT_INFO, ) @@ -758,7 +768,7 @@ async def check_and_mutate_row( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.check_and_mutate_row, - default_timeout=None, + default_timeout=20.0, client_info=DEFAULT_CLIENT_INFO, ) @@ -797,8 +807,8 @@ async def ping_and_warm( Args: request (Optional[Union[google.cloud.bigtable_v2.types.PingAndWarmRequest, dict]]): - The request object. Request message for client - connection keep-alive and warming. + The request object. Request message for client connection + keep-alive and warming. name (:class:`str`): Required. The unique name of the instance to check permissions for as well as respond. Values are of the @@ -973,7 +983,7 @@ async def read_modify_write_row( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.read_modify_write_row, - default_timeout=None, + default_timeout=20.0, client_info=DEFAULT_CLIENT_INFO, ) @@ -1017,8 +1027,9 @@ def generate_initial_change_stream_partitions( Args: request (Optional[Union[google.cloud.bigtable_v2.types.GenerateInitialChangeStreamPartitionsRequest, dict]]): - The request object. NOTE: This API is intended to be - used by Apache Beam BigtableIO. Request message for + The request object. NOTE: This API is intended to be used + by Apache Beam BigtableIO. Request + message for Bigtable.GenerateInitialChangeStreamPartitions. table_name (:class:`str`): Required. The unique name of the table from which to get @@ -1076,7 +1087,7 @@ def generate_initial_change_stream_partitions( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.generate_initial_change_stream_partitions, - default_timeout=None, + default_timeout=60.0, client_info=DEFAULT_CLIENT_INFO, ) @@ -1116,9 +1127,9 @@ def read_change_stream( Args: request (Optional[Union[google.cloud.bigtable_v2.types.ReadChangeStreamRequest, dict]]): - The request object. NOTE: This API is intended to be - used by Apache Beam BigtableIO. Request message for - Bigtable.ReadChangeStream. + The request object. NOTE: This API is intended to be used + by Apache Beam BigtableIO. Request + message for Bigtable.ReadChangeStream. table_name (:class:`str`): Required. The unique name of the table from which to read a change stream. Values are of the form @@ -1174,7 +1185,7 @@ def read_change_stream( # and friendly error handling. rpc = gapic_v1.method_async.wrap_method( self._client._transport.read_change_stream, - default_timeout=None, + default_timeout=43200.0, client_info=DEFAULT_CLIENT_INFO, ) diff --git a/google/cloud/bigtable_v2/services/bigtable/client.py b/google/cloud/bigtable_v2/services/bigtable/client.py index 3165f9160..db1175e27 100644 --- a/google/cloud/bigtable_v2/services/bigtable/client.py +++ b/google/cloud/bigtable_v2/services/bigtable/client.py @@ -382,9 +382,6 @@ def __init__( transport (Union[str, BigtableTransport]): The transport to use. If set to None, a transport is chosen automatically. - NOTE: "rest" transport functionality is currently in a - beta state (preview). We welcome your feedback via an - issue in this library's source repository. client_options (Optional[Union[google.api_core.client_options.ClientOptions, dict]]): Custom options for the client. It won't take effect if a ``transport`` instance is provided. (1) The ``api_endpoint`` property can be used to override the @@ -1096,8 +1093,8 @@ def ping_and_warm( Args: request (Union[google.cloud.bigtable_v2.types.PingAndWarmRequest, dict]): - The request object. Request message for client - connection keep-alive and warming. + The request object. Request message for client connection + keep-alive and warming. name (str): Required. The unique name of the instance to check permissions for as well as respond. Values are of the @@ -1332,8 +1329,9 @@ def generate_initial_change_stream_partitions( Args: request (Union[google.cloud.bigtable_v2.types.GenerateInitialChangeStreamPartitionsRequest, dict]): - The request object. NOTE: This API is intended to be - used by Apache Beam BigtableIO. Request message for + The request object. NOTE: This API is intended to be used + by Apache Beam BigtableIO. Request + message for Bigtable.GenerateInitialChangeStreamPartitions. table_name (str): Required. The unique name of the table from which to get @@ -1435,9 +1433,9 @@ def read_change_stream( Args: request (Union[google.cloud.bigtable_v2.types.ReadChangeStreamRequest, dict]): - The request object. NOTE: This API is intended to be - used by Apache Beam BigtableIO. Request message for - Bigtable.ReadChangeStream. + The request object. NOTE: This API is intended to be used + by Apache Beam BigtableIO. Request + message for Bigtable.ReadChangeStream. table_name (str): Required. The unique name of the table from which to read a change stream. Values are of the form diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/base.py b/google/cloud/bigtable_v2/services/bigtable/transports/base.py index 5879a63cb..5b4580c18 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/base.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/base.py @@ -132,27 +132,37 @@ def _prep_wrapped_messages(self, client_info): self._wrapped_methods = { self.read_rows: gapic_v1.method.wrap_method( self.read_rows, - default_timeout=None, + default_timeout=43200.0, client_info=client_info, ), self.sample_row_keys: gapic_v1.method.wrap_method( self.sample_row_keys, - default_timeout=None, + default_timeout=60.0, client_info=client_info, ), self.mutate_row: gapic_v1.method.wrap_method( self.mutate_row, - default_timeout=None, + default_retry=retries.Retry( + initial=0.01, + maximum=60.0, + multiplier=2, + predicate=retries.if_exception_type( + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + ), + deadline=60.0, + ), + default_timeout=60.0, client_info=client_info, ), self.mutate_rows: gapic_v1.method.wrap_method( self.mutate_rows, - default_timeout=None, + default_timeout=600.0, client_info=client_info, ), self.check_and_mutate_row: gapic_v1.method.wrap_method( self.check_and_mutate_row, - default_timeout=None, + default_timeout=20.0, client_info=client_info, ), self.ping_and_warm: gapic_v1.method.wrap_method( @@ -162,17 +172,17 @@ def _prep_wrapped_messages(self, client_info): ), self.read_modify_write_row: gapic_v1.method.wrap_method( self.read_modify_write_row, - default_timeout=None, + default_timeout=20.0, client_info=client_info, ), self.generate_initial_change_stream_partitions: gapic_v1.method.wrap_method( self.generate_initial_change_stream_partitions, - default_timeout=None, + default_timeout=60.0, client_info=client_info, ), self.read_change_stream: gapic_v1.method.wrap_method( self.read_change_stream, - default_timeout=None, + default_timeout=43200.0, client_info=client_info, ), } diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/rest.py b/google/cloud/bigtable_v2/services/bigtable/transports/rest.py index 6c786f6b3..4343fbb90 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/rest.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/rest.py @@ -365,9 +365,6 @@ class BigtableRestTransport(BigtableTransport): It sends JSON representations of protocol buffers over HTTP/1.1 - NOTE: This REST transport functionality is currently in a beta - state (preview). We welcome your feedback via an issue in this - library's source repository. Thank you! """ def __init__( @@ -387,39 +384,35 @@ def __init__( ) -> None: """Instantiate the transport. - NOTE: This REST transport functionality is currently in a beta - state (preview). We welcome your feedback via a GitHub issue in - this library's repository. Thank you! - - Args: - host (Optional[str]): - The hostname to connect to. - credentials (Optional[google.auth.credentials.Credentials]): The - authorization credentials to attach to requests. These - credentials identify the application to the service; if none - are specified, the client will attempt to ascertain the - credentials from the environment. - - credentials_file (Optional[str]): A file with credentials that can - be loaded with :func:`google.auth.load_credentials_from_file`. - This argument is ignored if ``channel`` is provided. - scopes (Optional(Sequence[str])): A list of scopes. This argument is - ignored if ``channel`` is provided. - client_cert_source_for_mtls (Callable[[], Tuple[bytes, bytes]]): Client - certificate to configure mutual TLS HTTP channel. It is ignored - if ``channel`` is provided. - quota_project_id (Optional[str]): An optional project to use for billing - and quota. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you are developing - your own client library. - always_use_jwt_access (Optional[bool]): Whether self signed JWT should - be used for service account credentials. - url_scheme: the protocol scheme for the API endpoint. Normally - "https", but for testing or local servers, - "http" can be specified. + Args: + host (Optional[str]): + The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional(Sequence[str])): A list of scopes. This argument is + ignored if ``channel`` is provided. + client_cert_source_for_mtls (Callable[[], Tuple[bytes, bytes]]): Client + certificate to configure mutual TLS HTTP channel. It is ignored + if ``channel`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you are developing + your own client library. + always_use_jwt_access (Optional[bool]): Whether self signed JWT should + be used for service account credentials. + url_scheme: the protocol scheme for the API endpoint. Normally + "https", but for testing or local servers, + "http" can be specified. """ # Run the base constructor # TODO(yon-mg): resolve other ctor params i.e. scopes, quota, etc. @@ -478,7 +471,6 @@ def __call__( request (~.bigtable.CheckAndMutateRowRequest): The request object. Request message for Bigtable.CheckAndMutateRow. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -510,7 +502,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -520,11 +512,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -580,7 +574,6 @@ def __call__( by Apache Beam BigtableIO. Request message for Bigtable.GenerateInitialChangeStreamPartitions. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -619,7 +612,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -629,11 +622,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -687,7 +682,6 @@ def __call__( request (~.bigtable.MutateRowRequest): The request object. Request message for Bigtable.MutateRow. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -717,7 +711,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -727,11 +721,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -784,7 +780,6 @@ def __call__( request (~.bigtable.MutateRowsRequest): The request object. Request message for BigtableService.MutateRows. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -814,7 +809,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -824,11 +819,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -880,7 +877,6 @@ def __call__( request (~.bigtable.PingAndWarmRequest): The request object. Request message for client connection keep-alive and warming. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -911,7 +907,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -921,11 +917,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -979,7 +977,6 @@ def __call__( The request object. NOTE: This API is intended to be used by Apache Beam BigtableIO. Request message for Bigtable.ReadChangeStream. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1012,7 +1009,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -1022,11 +1019,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1078,7 +1077,6 @@ def __call__( request (~.bigtable.ReadModifyWriteRowRequest): The request object. Request message for Bigtable.ReadModifyWriteRow. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1110,7 +1108,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -1120,11 +1118,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1177,7 +1177,6 @@ def __call__( request (~.bigtable.ReadRowsRequest): The request object. Request message for Bigtable.ReadRows. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1207,7 +1206,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -1217,11 +1216,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1271,7 +1272,6 @@ def __call__( request (~.bigtable.SampleRowKeysRequest): The request object. Request message for Bigtable.SampleRowKeys. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1303,11 +1303,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" From 983d4c7860dfd15a21609d280b880b496fbae7f7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 24 Mar 2023 16:49:04 -0700 Subject: [PATCH 098/349] reverted to protoc generation --- .../bigtable_v2/services/bigtable/client.py | 18 +-- .../services/bigtable/transports/rest.py | 126 +++++++++--------- 2 files changed, 72 insertions(+), 72 deletions(-) diff --git a/google/cloud/bigtable_v2/services/bigtable/client.py b/google/cloud/bigtable_v2/services/bigtable/client.py index db1175e27..3165f9160 100644 --- a/google/cloud/bigtable_v2/services/bigtable/client.py +++ b/google/cloud/bigtable_v2/services/bigtable/client.py @@ -382,6 +382,9 @@ def __init__( transport (Union[str, BigtableTransport]): The transport to use. If set to None, a transport is chosen automatically. + NOTE: "rest" transport functionality is currently in a + beta state (preview). We welcome your feedback via an + issue in this library's source repository. client_options (Optional[Union[google.api_core.client_options.ClientOptions, dict]]): Custom options for the client. It won't take effect if a ``transport`` instance is provided. (1) The ``api_endpoint`` property can be used to override the @@ -1093,8 +1096,8 @@ def ping_and_warm( Args: request (Union[google.cloud.bigtable_v2.types.PingAndWarmRequest, dict]): - The request object. Request message for client connection - keep-alive and warming. + The request object. Request message for client + connection keep-alive and warming. name (str): Required. The unique name of the instance to check permissions for as well as respond. Values are of the @@ -1329,9 +1332,8 @@ def generate_initial_change_stream_partitions( Args: request (Union[google.cloud.bigtable_v2.types.GenerateInitialChangeStreamPartitionsRequest, dict]): - The request object. NOTE: This API is intended to be used - by Apache Beam BigtableIO. Request - message for + The request object. NOTE: This API is intended to be + used by Apache Beam BigtableIO. Request message for Bigtable.GenerateInitialChangeStreamPartitions. table_name (str): Required. The unique name of the table from which to get @@ -1433,9 +1435,9 @@ def read_change_stream( Args: request (Union[google.cloud.bigtable_v2.types.ReadChangeStreamRequest, dict]): - The request object. NOTE: This API is intended to be used - by Apache Beam BigtableIO. Request - message for Bigtable.ReadChangeStream. + The request object. NOTE: This API is intended to be + used by Apache Beam BigtableIO. Request message for + Bigtable.ReadChangeStream. table_name (str): Required. The unique name of the table from which to read a change stream. Values are of the form diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/rest.py b/google/cloud/bigtable_v2/services/bigtable/transports/rest.py index 4343fbb90..6c786f6b3 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/rest.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/rest.py @@ -365,6 +365,9 @@ class BigtableRestTransport(BigtableTransport): It sends JSON representations of protocol buffers over HTTP/1.1 + NOTE: This REST transport functionality is currently in a beta + state (preview). We welcome your feedback via an issue in this + library's source repository. Thank you! """ def __init__( @@ -384,35 +387,39 @@ def __init__( ) -> None: """Instantiate the transport. - Args: - host (Optional[str]): - The hostname to connect to. - credentials (Optional[google.auth.credentials.Credentials]): The - authorization credentials to attach to requests. These - credentials identify the application to the service; if none - are specified, the client will attempt to ascertain the - credentials from the environment. - - credentials_file (Optional[str]): A file with credentials that can - be loaded with :func:`google.auth.load_credentials_from_file`. - This argument is ignored if ``channel`` is provided. - scopes (Optional(Sequence[str])): A list of scopes. This argument is - ignored if ``channel`` is provided. - client_cert_source_for_mtls (Callable[[], Tuple[bytes, bytes]]): Client - certificate to configure mutual TLS HTTP channel. It is ignored - if ``channel`` is provided. - quota_project_id (Optional[str]): An optional project to use for billing - and quota. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you are developing - your own client library. - always_use_jwt_access (Optional[bool]): Whether self signed JWT should - be used for service account credentials. - url_scheme: the protocol scheme for the API endpoint. Normally - "https", but for testing or local servers, - "http" can be specified. + NOTE: This REST transport functionality is currently in a beta + state (preview). We welcome your feedback via a GitHub issue in + this library's repository. Thank you! + + Args: + host (Optional[str]): + The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional(Sequence[str])): A list of scopes. This argument is + ignored if ``channel`` is provided. + client_cert_source_for_mtls (Callable[[], Tuple[bytes, bytes]]): Client + certificate to configure mutual TLS HTTP channel. It is ignored + if ``channel`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you are developing + your own client library. + always_use_jwt_access (Optional[bool]): Whether self signed JWT should + be used for service account credentials. + url_scheme: the protocol scheme for the API endpoint. Normally + "https", but for testing or local servers, + "http" can be specified. """ # Run the base constructor # TODO(yon-mg): resolve other ctor params i.e. scopes, quota, etc. @@ -471,6 +478,7 @@ def __call__( request (~.bigtable.CheckAndMutateRowRequest): The request object. Request message for Bigtable.CheckAndMutateRow. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -502,7 +510,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -512,13 +520,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -574,6 +580,7 @@ def __call__( by Apache Beam BigtableIO. Request message for Bigtable.GenerateInitialChangeStreamPartitions. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -612,7 +619,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -622,13 +629,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -682,6 +687,7 @@ def __call__( request (~.bigtable.MutateRowRequest): The request object. Request message for Bigtable.MutateRow. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -711,7 +717,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -721,13 +727,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -780,6 +784,7 @@ def __call__( request (~.bigtable.MutateRowsRequest): The request object. Request message for BigtableService.MutateRows. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -809,7 +814,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -819,13 +824,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -877,6 +880,7 @@ def __call__( request (~.bigtable.PingAndWarmRequest): The request object. Request message for client connection keep-alive and warming. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -907,7 +911,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -917,13 +921,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -977,6 +979,7 @@ def __call__( The request object. NOTE: This API is intended to be used by Apache Beam BigtableIO. Request message for Bigtable.ReadChangeStream. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1009,7 +1012,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -1019,13 +1022,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1077,6 +1078,7 @@ def __call__( request (~.bigtable.ReadModifyWriteRowRequest): The request object. Request message for Bigtable.ReadModifyWriteRow. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1108,7 +1110,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -1118,13 +1120,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1177,6 +1177,7 @@ def __call__( request (~.bigtable.ReadRowsRequest): The request object. Request message for Bigtable.ReadRows. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1206,7 +1207,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -1216,13 +1217,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1272,6 +1271,7 @@ def __call__( request (~.bigtable.SampleRowKeysRequest): The request object. Request message for Bigtable.SampleRowKeys. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1303,13 +1303,11 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=True, + use_integers_for_enums=False, ) ) query_params.update(self._get_unset_required_fields(query_params)) - query_params["$alt"] = "json;enum-encoding=int" - # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" From c19658ab8aa9d34b341e07a7c6916c14a71023cf Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 24 Mar 2023 16:55:29 -0700 Subject: [PATCH 099/349] got tests passing --- gapic-generator-fork | 2 +- tests/unit/gapic/bigtable_v2/test_bigtable.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index dba782b6b..cdda28398 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit dba782b6b9f12f25bf79b567012b78e40ca1ae1e +Subproject commit cdda28398f0d8e6c9f85a750cb684d56f64c0cc9 diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index f6d514d65..5e227ff82 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -117,7 +117,7 @@ def test_bigtable_client_from_service_account_info(client_class, transport_name) assert client.transport._host == ( "bigtable.googleapis.com:443" - if transport_name in ["grpc", "grpc_asyncio"] + if transport_name in ["grpc", "grpc_asyncio", "pooled_grpc_asyncio"] else "https://bigtable.googleapis.com" ) @@ -178,7 +178,7 @@ def test_bigtable_client_from_service_account_file(client_class, transport_name) assert client.transport._host == ( "bigtable.googleapis.com:443" - if transport_name in ["grpc", "grpc_asyncio"] + if transport_name in ["grpc", "grpc_asyncio", "pooled_grpc_asyncio"] else "https://bigtable.googleapis.com" ) @@ -6311,7 +6311,7 @@ def test_bigtable_host_no_port(transport_name): ) assert client.transport._host == ( "bigtable.googleapis.com:443" - if transport_name in ["grpc", "grpc_asyncio"] + if transport_name in ["grpc", "grpc_asyncio", "pooled_grpc_asyncio"] else "https://bigtable.googleapis.com" ) @@ -6335,7 +6335,7 @@ def test_bigtable_host_with_port(transport_name): ) assert client.transport._host == ( "bigtable.googleapis.com:8000" - if transport_name in ["grpc", "grpc_asyncio"] + if transport_name in ["grpc", "grpc_asyncio", "pooled_grpc_asyncio"] else "https://bigtable.googleapis.com:8000" ) From c2d0da0ae678ac4955be207443c9db1c799b7a29 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 24 Mar 2023 17:09:20 -0700 Subject: [PATCH 100/349] added new test file for client --- tests/unit/test_client.py | 70 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 tests/unit/test_client.py diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py new file mode 100644 index 000000000..a05b00511 --- /dev/null +++ b/tests/unit/test_client.py @@ -0,0 +1,70 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pytest +import unittest + + +class TestBigtableDataClient(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigtable.client import BigtableDataClient + + return BigtableDataClient + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + def test_ctor(self): + pass + + def test_channel_pool_creation(self): + pass + + def test_channel_pool_rotation(self): + pass + + def test_channel_pool_replace(self): + pass + + def test_start_background_channel_refresh(self): + pass + + def test__ping_and_warm_instances(self): + pass + + def test__manage_channel(self): + pass + + def test_register_instance(self): + pass + + def test_remove_instance_registration(self): + pass + + def test_get_table(self): + pass + + +class TestTable(unittest.TestCase): + + + def _make_one(self, *args, **kwargs): + from google.cloud.bigtable.client import BigtableDataClient + + return BigtableDataClient().get_table(*args, **kwargs) + + def test_ctor(self): + pass From 8b54a304af12198f1b85a6eb437112c23e567b2f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 24 Mar 2023 17:49:39 -0700 Subject: [PATCH 101/349] set up transport in client --- google/cloud/bigtable/client.py | 38 +++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 55a689b57..7009cc05e 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -23,6 +23,7 @@ import warnings from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient +from google.cloud.bigtable_v2.services.bigtable.async_client import DEFAULT_CLIENT_INFO from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( PooledBigtableGrpcAsyncIOTransport, ) @@ -31,6 +32,9 @@ import google.auth.credentials +import google.auth._default +from google.api_core import client_options as client_options_lib + if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry @@ -80,17 +84,39 @@ def __init__( ) if type(client_options) is dict: client_options = google.api_core.client_options.from_dict(client_options) - client_options = cast( - Optional["google.api_core.client_options.ClientOptions"], client_options + if client_options is None: + client_options = client_options_lib.ClientOptions() + client_options = cast(client_options_lib.ClientOptions, client_options) + + api_endpoint, client_cert_source_func = BigtableAsyncClient.get_mtls_endpoint_and_cert_source( + client_options ) + + api_key_value = getattr(client_options, "api_key", None) + if api_key_value and credentials: + raise ValueError( + "client_options.api_key and credentials are mutually exclusive" + ) + if api_key_value and hasattr(google.auth._default, "get_api_key_credentials"): + credentials = google.auth._default.get_api_key_credentials(api_key_value) + + self.transport = PooledBigtableGrpcAsyncIOTransport( + pool_size=pool_size, + credentials=credentials, + credentials_file=client_options.credentials_file, + host=api_endpoint, + scopes=client_options.scopes, + client_cert_source_for_mtls=client_cert_source_func, + quota_project_id=client_options.quota_project_id, + client_info=DEFAULT_CLIENT_INFO, + always_use_jwt_access=True, + api_audience=client_options.api_audience, + ) self._gapic_client = BigtableAsyncClient( credentials=credentials, - transport="pooled_grpc_asyncio", + transport=self.transport, client_options=client_options, ) - self.transport: PooledBigtableGrpcAsyncIOTransport = cast( - PooledBigtableGrpcAsyncIOTransport, self._gapic_client.transport - ) # keep track of active instances to for warmup on channel refresh self._active_instances: Set[str] = set() # attempt to start background tasks From 0dd981b258c6240a87560c8539520b97e9e0a29d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 24 Mar 2023 17:50:11 -0700 Subject: [PATCH 102/349] implemented tests for underlying transport --- tests/unit/test_client.py | 55 +++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index a05b00511..bb3ffd176 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -15,6 +15,13 @@ import pytest import unittest +import grpc +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock # pragma: NO COVER +except ImportError: # pragma: NO COVER + import mock class TestBigtableDataClient(unittest.TestCase): @@ -31,13 +38,51 @@ def test_ctor(self): pass def test_channel_pool_creation(self): - pass + pool_size = 14 + with mock.patch.object(type(self._make_one().transport), "create_channel") as create_channel: + client = self._make_one(project="project-id", pool_size=pool_size) + self.assertEqual(create_channel.call_count, pool_size) + # channels should be unique + client = self._make_one(project="project-id", pool_size=pool_size) + pool_list = list(client.transport.channel_pool) + pool_set = set(client.transport.channel_pool) + self.assertEqual(len(pool_list), len(pool_set)) - def test_channel_pool_rotation(self): - pass - def test_channel_pool_replace(self): - pass + def test_channel_pool_rotation(self): + pool_size = 7 + client = self._make_one(project="project-id", pool_size=pool_size) + self.assertEqual(len(client.transport.channel_pool), pool_size) + + with mock.patch.object(type(client.transport), "next_channel") as next_channel: + with mock.patch.object(type(client.transport.channel_pool[0]), "unary_unary") as unary_unary: + # calling an rpc `pool_size` times should use a different channel each time + for i in range(pool_size): + channel_1 = client.transport.channel_pool[client.transport._next_idx] + next_channel.return_value = channel_1 + client.transport.ping_and_warm() + self.assertEqual(next_channel.call_count, i + 1) + channel_1.unary_unary.assert_called_once() + + + @pytest.mark.asyncio + async def test_channel_pool_replace(self): + pool_size = 7 + client = self._make_one(project="project-id", pool_size=pool_size) + for replace_idx in range(pool_size): + start_pool = [channel for channel in client.transport.channel_pool] + grace_period = 9 + with mock.patch.object(type(client.transport.channel_pool[0]), "close") as close: + new_channel = grpc.aio.insecure_channel("localhost:8080") + await client.transport.channel_pool.replace_channel(replace_idx, grace=grace_period, new_channel=new_channel) + close.assert_called_once_with(grace=grace_period) + close.assert_awaited_once() + self.assertEqual(client.transport.channel_pool[replace_idx], new_channel) + for i in range(pool_size): + if i != replace_idx: + self.assertEqual(client.transport.channel_pool[i], start_pool[i]) + else: + self.assertNotEqual(client.transport.channel_pool[i], start_pool[i]) def test_start_background_channel_refresh(self): pass From b0ecd3c08471df7b006acbbff66b2fa1aeb413b0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 24 Mar 2023 22:16:49 -0700 Subject: [PATCH 103/349] added some tests --- google/cloud/bigtable/client.py | 36 ++++++++++----- tests/unit/test_client.py | 80 +++++++++++++++++++++++++++++---- 2 files changed, 96 insertions(+), 20 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 7009cc05e..43c14fb9c 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -221,23 +221,35 @@ async def register_instance(self, instance_id: str): Channels will not be refreshed unless at least one instance is registered """ instance_name = self._gapic_client.instance_path(self.project, instance_id) - self._active_instances.add(instance_name) - if self._channel_refresh_tasks: - # refresh tasks already running - # call ping and warm on all existing channels - for channel in self.transport.channel_pool: - await self._ping_and_warm_instances(channel) - else: - # refresh tasks aren't active. start them as background tasks - self.start_background_channel_refresh() - - async def remove_instance_registration(self, instance_id: str): + if instance_name not in self._active_instances: + self._active_instances.add(instance_name) + if self._channel_refresh_tasks: + # refresh tasks already running + # call ping and warm on all existing channels + for channel in self.transport.channel_pool: + await self._ping_and_warm_instances(channel) + else: + # refresh tasks aren't active. start them as background tasks + self.start_background_channel_refresh() + + async def remove_instance_registration(self, instance_id: str) -> bool: """ Removes an instance from the client's registered instances, to prevent warming new channels for the instance + + If instance_id is not registered, returns False + + Args: + instance_id: id of the instance to remove + Returns: + - True if instance was removed """ instance_name = self._gapic_client.instance_path(self.project, instance_id) - self._active_instances.remove(instance_name) + try: + self._active_instances.remove(instance_name) + return True + except KeyError: + return False async def get_table( self, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index bb3ffd176..defd61df2 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -16,6 +16,7 @@ import pytest import unittest import grpc +import asyncio # try/except added for compatibility with python < 3.8 try: from unittest import mock @@ -24,7 +25,7 @@ import mock -class TestBigtableDataClient(unittest.TestCase): +class TestBigtableDataClient(unittest.IsolatedAsyncioTestCase): @staticmethod def _get_target_class(): from google.cloud.bigtable.client import BigtableDataClient @@ -74,7 +75,7 @@ async def test_channel_pool_replace(self): grace_period = 9 with mock.patch.object(type(client.transport.channel_pool[0]), "close") as close: new_channel = grpc.aio.insecure_channel("localhost:8080") - await client.transport.channel_pool.replace_channel(replace_idx, grace=grace_period, new_channel=new_channel) + await client.transport.replace_channel(replace_idx, grace=grace_period, new_channel=new_channel) close.assert_called_once_with(grace=grace_period) close.assert_awaited_once() self.assertEqual(client.transport.channel_pool[replace_idx], new_channel) @@ -93,14 +94,77 @@ def test__ping_and_warm_instances(self): def test__manage_channel(self): pass - def test_register_instance(self): - pass + @pytest.mark.asyncio + async def test_register_instance(self): + # create the client without calling start_background_channel_refresh + with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: + get_event_loop.side_effect = RuntimeError("no event loop") + client = self._make_one(project="project-id") + self.assertFalse(client._channel_refresh_tasks) + # first call should start background refresh + self.assertEqual(client._active_instances, set()) + await client.register_instance("instance-1") + self.assertEqual(len(client._active_instances), 1) + self.assertEqual(client._active_instances, {"projects/project-id/instances/instance-1"}) + self.assertTrue(client._channel_refresh_tasks) + # next call should not + with mock.patch.object(type(self._make_one()), "start_background_channel_refresh") as refresh_mock: + await client.register_instance("instance-2") + self.assertEqual(len(client._active_instances), 2) + self.assertEqual(client._active_instances, {"projects/project-id/instances/instance-1", "projects/project-id/instances/instance-2"}) + refresh_mock.assert_not_called() - def test_remove_instance_registration(self): - pass + @pytest.mark.asyncio + async def test_register_instance_ping_and_warm(self): + # should ping and warm each new instance + pool_size = 7 + with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: + get_event_loop.side_effect = RuntimeError("no event loop") + client = self._make_one(project="project-id", pool_size=pool_size) + # first call should start background refresh + self.assertFalse(client._channel_refresh_tasks) + await client.register_instance("instance-1") + self.assertEqual(len(client._channel_refresh_tasks), pool_size) + # next calls should trigger ping and warm + with mock.patch.object(type(self._make_one()), "_ping_and_warm_instances") as ping_mock: + await client.register_instance("instance-2") + self.assertEqual(ping_mock.call_count, pool_size) + await client.register_instance("instance-3") + self.assertEqual(ping_mock.call_count, pool_size * 2) + # duplcate instances should not trigger ping and warm + await client.register_instance("instance-3") + self.assertEqual(ping_mock.call_count, pool_size * 2) - def test_get_table(self): - pass + @pytest.mark.asyncio + async def test_remove_instance_registration(self): + client = self._make_one(project="project-id") + await client.register_instance("instance-1") + await client.register_instance("instance-2") + self.assertEqual(len(client._active_instances), 2) + success = await client.remove_instance_registration("instance-1") + self.assertTrue(success) + self.assertEqual(len(client._active_instances), 1) + self.assertEqual(client._active_instances, {"projects/project-id/instances/instance-2"}) + success = await client.remove_instance_registration("nonexistant") + self.assertFalse(success) + self.assertEqual(len(client._active_instances), 1) + + @pytest.mark.asyncio + async def test_get_table(self): + from google.cloud.bigtable.client import Table + client = self._make_one(project="project-id") + expected_table_id = "table-id" + expected_instance_id = "instance-id" + expected_app_profile_id = "app-profile-id" + with mock.patch.object(type(self._make_one()), "register_instance") as register_instance: + table = await client.get_table(expected_instance_id, expected_table_id, expected_app_profile_id) + register_instance.assert_called_once_with(expected_instance_id) + register_instance.assert_awaited_once() + self.assertIsInstance(table, Table) + self.assertEqual(table.table_id, expected_table_id) + self.assertEqual(table.instance, expected_instance_id) + self.assertEqual(table.app_profile_id, expected_app_profile_id) + self.assertIs(table.client, client) class TestTable(unittest.TestCase): From b8a6218cc5a22826397ea6c2e808862cf3a22502 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 27 Mar 2023 11:43:31 -0700 Subject: [PATCH 104/349] update error text Co-authored-by: Mattie Fu --- google/cloud/bigtable/read_rows_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index fb7a4174b..f92e588bf 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -142,7 +142,7 @@ def add_range( if start_is_inclusive is None: start_is_inclusive = True elif start_key is None: - raise ValueError("start_is_inclusive must not be set without start_key") + raise ValueError("start_is_inclusive must be set with start_key") if end_is_inclusive is None: end_is_inclusive = False elif end_key is None: From fd1038da6343267075dd980f8254f7aa4f31cec4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 27 Mar 2023 11:44:09 -0700 Subject: [PATCH 105/349] update error text Co-authored-by: Mattie Fu --- google/cloud/bigtable/read_rows_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index f92e588bf..f16de7b14 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -146,7 +146,7 @@ def add_range( if end_is_inclusive is None: end_is_inclusive = False elif end_key is None: - raise ValueError("end_is_inclusive must not be set without end_key") + raise ValueError("end_is_inclusive must be set with end_key") # ensure that start_key and end_key are bytes if isinstance(start_key, str): start_key = start_key.encode() From 2f316ce864b6d2406bca8195a752a7391fd71d12 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 27 Mar 2023 12:56:12 -0700 Subject: [PATCH 106/349] fixed docstring Co-authored-by: Mattie Fu --- google/cloud/bigtable/row_response.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index f8b30f833..d08f3caea 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -117,7 +117,7 @@ def _get_all_from_family( self, family: family_id ) -> Generator[CellResponse, None, None]: """ - Returns all cells in the row + Returns all cells in the row for the family_id """ if family not in self._cells_map: raise ValueError(f"Family '{family}' not found in row '{self.row_key!r}'") From e47551fa93914f66704df8155eaeafa6b3c127d7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 27 Mar 2023 18:21:32 -0700 Subject: [PATCH 107/349] added manage channels tests --- google/cloud/bigtable/client.py | 5 +- tests/unit/test_client.py | 133 +++++++++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 43c14fb9c..f303c2a3b 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -184,7 +184,7 @@ async def _manage_channel( requests before closing, in seconds """ first_refresh = self._channel_init_time + refresh_interval - next_sleep = first_refresh - time.time() + next_sleep = max(first_refresh - time.time(), 0) if next_sleep > 0: # warm the current channel immediately channel = self.transport.channel_pool[channel_idx] @@ -196,7 +196,6 @@ async def _manage_channel( new_channel = self.transport.create_channel( self.transport._host, credentials=self.transport._credentials, - credentials_file=None, scopes=self.transport._scopes, ssl_credentials=self.transport._ssl_channel_credentials, quota_project_id=self.transport._quota_project_id, @@ -205,7 +204,7 @@ async def _manage_channel( ("grpc.max_receive_message_length", -1), ], ) - await self._ping_and_warm_instances(channel) + await self._ping_and_warm_instances(new_channel) # cycle channel out of use, with long grace window before closure start_timestamp = time.time() await self.transport.replace_channel(channel_idx, grace_period, new_channel) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index defd61df2..4d46c4302 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -91,8 +91,125 @@ def test_start_background_channel_refresh(self): def test__ping_and_warm_instances(self): pass - def test__manage_channel(self): - pass + @pytest.mark.asyncio + async def test__manage_channel_first_sleep(self): + # first sleep time should be `refresh_interval` seconds after client init + import time + from collections import namedtuple + params = namedtuple('params', ['refresh_interval', 'wait_time', 'expected_sleep']) + test_params = [ + params(refresh_interval=0, wait_time=0, expected_sleep=0), + params(refresh_interval=0, wait_time=1, expected_sleep=0), + params(refresh_interval=10, wait_time=0, expected_sleep=10), + params(refresh_interval=10, wait_time=5, expected_sleep=5), + params(refresh_interval=10, wait_time=10, expected_sleep=0), + params(refresh_interval=10, wait_time=15, expected_sleep=0), + ] + with mock.patch.object(time, "time") as time: + time.return_value = 0 + for refresh_interval, wait_time, expected_sleep in test_params: + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = asyncio.CancelledError + try: + client = self._make_one(project="project-id") + client._channel_init_time = -wait_time + await client._manage_channel(0, refresh_interval) + except asyncio.CancelledError: + pass + sleep.assert_called_once() + call_time = sleep.call_args[0][0] + self.assertAlmostEqual(call_time, expected_sleep, delta=0.1, + msg=f"params={params}") + + @pytest.mark.asyncio + async def test__manage_channel_ping_and_warm(self): + # should ping an warm all new channels, and old channels if sleeping + client = self._make_one(project="project-id") + new_channel = grpc.aio.insecure_channel("localhost:8080") + with mock.patch.object(asyncio, "sleep") as sleep: + with mock.patch.object(type(self._make_one().transport), "create_channel") as create_channel: + create_channel.return_value = new_channel + with mock.patch.object(type(self._make_one().transport), "replace_channel") as replace_channel: + replace_channel.side_effect = asyncio.CancelledError + # should ping and warm old channel then new if sleep > 0 + with mock.patch.object(type(self._make_one()), "_ping_and_warm_instances") as ping_and_warm: + try: + channel_idx = 2 + old_channel = client.transport.channel_pool[channel_idx] + await client._manage_channel(channel_idx, 10) + except asyncio.CancelledError: + pass + self.assertEqual(ping_and_warm.call_count, 2) + self.assertNotEqual(old_channel, new_channel) + called_with = [call[0][0] for call in ping_and_warm.call_args_list] + self.assertIn(old_channel, called_with) + self.assertIn(new_channel, called_with) + # should ping and warm instantly new channel only if not sleeping + with mock.patch.object(type(self._make_one()), "_ping_and_warm_instances") as ping_and_warm: + try: + await client._manage_channel(0, 0) + except asyncio.CancelledError: + pass + ping_and_warm.assert_called_once_with(new_channel) + + @pytest.mark.asyncio + async def test__manage_channel_sleeps(self): + # make sure that sleeps work as expected + from collections import namedtuple + import time + params = namedtuple('params', ['refresh_interval', 'num_cycles', 'expected_sleep']) + test_params = [ + params(refresh_interval=None, num_cycles=1, expected_sleep=60*45), + params(refresh_interval=10, num_cycles=10, expected_sleep=100), + params(refresh_interval=10, num_cycles=1, expected_sleep=10), + ] + channel_idx = 1 + with mock.patch.object(time, "time") as time: + time.return_value = 0 + for refresh_interval, num_cycles, expected_sleep in test_params: + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = [None for i in range(num_cycles-1)] + [asyncio.CancelledError] + try: + client = self._make_one(project="project-id") + if refresh_interval is not None: + await client._manage_channel(channel_idx, refresh_interval) + else: + await client._manage_channel(channel_idx) + except asyncio.CancelledError: + pass + self.assertEqual(sleep.call_count, num_cycles) + total_sleep = sum([call[0][0] for call in sleep.call_args_list]) + self.assertAlmostEqual(total_sleep, expected_sleep, delta=0.1, + msg=f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}") + + @pytest.mark.asyncio + async def test__manage_channel_refresh(self): + # make sure that channels are properly refreshed + from collections import namedtuple + import time + expected_grace = 9 + expected_refresh = 0.5 + channel_idx = 1 + new_channel = grpc.aio.insecure_channel("localhost:8080") + + for num_cycles in [0, 1, 10, 100]: + with mock.patch.object(type(self._make_one().transport), "replace_channel") as replace_channel: + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = [None for i in range(num_cycles)] + [asyncio.CancelledError] + client = self._make_one(project="project-id") + with mock.patch.object(type(self._make_one().transport), "create_channel") as create_channel: + create_channel.return_value = new_channel + try: + await client._manage_channel(channel_idx, refresh_interval=expected_refresh, grace_period=expected_grace) + except asyncio.CancelledError: + pass + self.assertEqual(sleep.call_count, num_cycles+1) + self.assertEqual(create_channel.call_count, num_cycles) + self.assertEqual(replace_channel.call_count, num_cycles) + for call in replace_channel.call_args_list: + self.assertEqual(call[0][0], channel_idx) + self.assertEqual(call[0][1], expected_grace) + self.assertEqual(call[0][2], new_channel) @pytest.mark.asyncio async def test_register_instance(self): @@ -157,15 +274,23 @@ async def test_get_table(self): expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" with mock.patch.object(type(self._make_one()), "register_instance") as register_instance: - table = await client.get_table(expected_instance_id, expected_table_id, expected_app_profile_id) + table = client.get_table(expected_instance_id, expected_table_id, expected_app_profile_id) register_instance.assert_called_once_with(expected_instance_id) - register_instance.assert_awaited_once() self.assertIsInstance(table, Table) self.assertEqual(table.table_id, expected_table_id) self.assertEqual(table.instance, expected_instance_id) self.assertEqual(table.app_profile_id, expected_app_profile_id) self.assertIs(table.client, client) + def test_get_table_no_loop(self): + client = self._make_one(project="project-id") + with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: + get_event_loop.side_effect = RuntimeError("no event loop") + client.get_table("instance-id", "table-id") + with self.assertWarns(Warning) as cm: + client.get_table("instance-id", "table-id") + self.assertIn("Table should be created in an asyncio event loop", str(cm.warning)) + class TestTable(unittest.TestCase): From 96d526b066f52425ee99c79c52cc1324f08141ff Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 10:52:41 -0700 Subject: [PATCH 108/349] added more tests --- tests/unit/test_client.py | 59 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 4d46c4302..306c67a0c 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -85,11 +85,62 @@ async def test_channel_pool_replace(self): else: self.assertNotEqual(client.transport.channel_pool[i], start_pool[i]) - def test_start_background_channel_refresh(self): - pass + def test_start_background_channel_refresh_sync(self): + # should raise RuntimeError if called in a sync context + client = self._make_one(project="project-id") + with self.assertRaises(RuntimeError): + client.start_background_channel_refresh() - def test__ping_and_warm_instances(self): - pass + @pytest.mark.asyncio + async def test_start_background_channel_refresh_tasks_exist(self): + # if tasks exist, should do nothing + client = self._make_one(project="project-id") + client._channel_refresh_tasks = [object()] + with mock.patch.object(asyncio, "create_task") as create_task: + client.start_background_channel_refresh() + create_task.assert_not_called() + + @pytest.mark.asyncio + async def test_start_background_channel_refresh(self): + # should create background tasks for each channel + for pool_size in [1, 3, 7]: + client = self._make_one(project="project-id", pool_size=pool_size) + ping_and_warm = AsyncMock() + client._ping_and_warm_instances = ping_and_warm + client.start_background_channel_refresh() + self.assertEqual(len(client._channel_refresh_tasks), pool_size) + for task in client._channel_refresh_tasks: + self.assertIsInstance(task, asyncio.Task) + await asyncio.gather(*client._channel_refresh_tasks) + await asyncio.sleep(0.1) + self.assertEqual(ping_and_warm.call_count, pool_size) + for channel in client.transport.channel_pool: + ping_and_warm.assert_any_call(channel) + + @pytest.mark.asyncio + async def test__ping_and_warm_instances(self): + # test with no instances + gather = AsyncMock() + asyncio.gather = gather + client = self._make_one(project="project-id", pool_size=1) + channel = client.transport.channel_pool[0] + await client._ping_and_warm_instances(channel) + gather.assert_called_once() + gather.assert_awaited_once() + self.assertFalse(gather.call_args.args) + self.assertEqual(gather.call_args.kwargs, {"return_exceptions": True}) + # test with instances + client._active_instances = ["instance-1", "instance-2", "instance-3", "instance-4"] + gather = AsyncMock() + asyncio.gather = gather + await client._ping_and_warm_instances(channel) + gather.assert_called_once() + gather.assert_awaited_once() + self.assertEqual(len(gather.call_args.args), 4) + self.assertEqual(gather.call_args.kwargs, {"return_exceptions": True}) + for idx, call in enumerate(gather.call_args.args): + self.assertIsInstance(call, grpc.aio.UnaryUnaryCall) + call._request["name"] = client._active_instances[idx] @pytest.mark.asyncio async def test__manage_channel_first_sleep(self): From e997892554e713869fb5d98bce2dd8c5ef019e5f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 11:48:45 -0700 Subject: [PATCH 109/349] client needs active event loop; reorganized tests around that --- google/cloud/bigtable/client.py | 44 +++++------- tests/unit/test_client.py | 114 ++++++++++++-------------------- 2 files changed, 57 insertions(+), 101 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index f303c2a3b..727fc8460 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -61,6 +61,8 @@ def __init__( """ Create a client instance for the Bigtable Data API + Client must be created within an async run loop context + Args: project: the project which the client acts on behalf of. If not passed, falls back to the default inferred @@ -77,6 +79,13 @@ def __init__( on the client. API Endpoint should be set through client_options. metadata: a list of metadata headers to be attached to all calls with this client """ + # check for active run loop + try: + asyncio.get_running_loop() + except RuntimeError as e: + raise RuntimeError( + f"{self.__class__.__name__} must be created within an async context" + ) from e super(BigtableDataClient, self).__init__( project=project, credentials=credentials, @@ -117,32 +126,16 @@ def __init__( transport=self.transport, client_options=client_options, ) + self.metadata = metadata # keep track of active instances to for warmup on channel refresh self._active_instances: Set[str] = set() # attempt to start background tasks self._channel_init_time = time.time() self._channel_refresh_tasks: list[asyncio.Task[None]] = [] - try: - self.start_background_channel_refresh() - except RuntimeError: - warnings.warn( - "BigtableDataClient should be started in an " - "asyncio event loop. Channel refresh will not be started" - ) + for channel_idx in range(len(self.transport.channel_pool)): + refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) + self._channel_refresh_tasks.append(refresh_task) - def start_background_channel_refresh(self) -> None: - """ - Starts a background task to ping and warm each channel in the pool - - Raises: - - RuntimeError if not called in an asyncio event loop - """ - if not self._channel_refresh_tasks: - # raise RuntimeError if there is no event loop - asyncio.get_running_loop() - for channel_idx in range(len(self.transport.channel_pool)): - refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) - self._channel_refresh_tasks.append(refresh_task) async def _ping_and_warm_instances( self, channel: grpc.aio.Channel @@ -222,14 +215,9 @@ async def register_instance(self, instance_id: str): instance_name = self._gapic_client.instance_path(self.project, instance_id) if instance_name not in self._active_instances: self._active_instances.add(instance_name) - if self._channel_refresh_tasks: - # refresh tasks already running - # call ping and warm on all existing channels - for channel in self.transport.channel_pool: - await self._ping_and_warm_instances(channel) - else: - # refresh tasks aren't active. start them as background tasks - self.start_background_channel_refresh() + # call ping and warm on all existing channels + for channel in self.transport.channel_pool: + await self._ping_and_warm_instances(channel) async def remove_instance_registration(self, instance_id: str) -> bool: """ diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 306c67a0c..fee10da6f 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -24,8 +24,7 @@ except ImportError: # pragma: NO COVER import mock - -class TestBigtableDataClient(unittest.IsolatedAsyncioTestCase): +class TestBigtableDataClientAsync(unittest.IsolatedAsyncioTestCase): @staticmethod def _get_target_class(): from google.cloud.bigtable.client import BigtableDataClient @@ -35,10 +34,23 @@ def _get_target_class(): def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) - def test_ctor(self): - pass - - def test_channel_pool_creation(self): + @pytest.mark.asyncio + async def test_ctor(self): + expected_project = "project-id" + expected_pool_size = 11 + expected_metadata = [("a", "b")] + client = self._make_one( + project="project-id", pool_size=expected_pool_size, metadata=expected_metadata, + ) + await asyncio.sleep(0.1) + self.assertEqual(client.project, expected_project) + self.assertEqual(len(client.transport.channel_pool), expected_pool_size) + self.assertEqual(client.metadata, expected_metadata) + self.assertFalse(client._active_instances) + self.assertEqual(len(client._channel_refresh_tasks), expected_pool_size) + + + async def test_channel_pool_creation(self): pool_size = 14 with mock.patch.object(type(self._make_one().transport), "create_channel") as create_channel: client = self._make_one(project="project-id", pool_size=pool_size) @@ -49,8 +61,7 @@ def test_channel_pool_creation(self): pool_set = set(client.transport.channel_pool) self.assertEqual(len(pool_list), len(pool_set)) - - def test_channel_pool_rotation(self): + async def test_channel_pool_rotation(self): pool_size = 7 client = self._make_one(project="project-id", pool_size=pool_size) self.assertEqual(len(client.transport.channel_pool), pool_size) @@ -66,7 +77,6 @@ def test_channel_pool_rotation(self): channel_1.unary_unary.assert_called_once() - @pytest.mark.asyncio async def test_channel_pool_replace(self): pool_size = 7 client = self._make_one(project="project-id", pool_size=pool_size) @@ -85,39 +95,20 @@ async def test_channel_pool_replace(self): else: self.assertNotEqual(client.transport.channel_pool[i], start_pool[i]) - def test_start_background_channel_refresh_sync(self): - # should raise RuntimeError if called in a sync context - client = self._make_one(project="project-id") - with self.assertRaises(RuntimeError): - client.start_background_channel_refresh() - - @pytest.mark.asyncio - async def test_start_background_channel_refresh_tasks_exist(self): - # if tasks exist, should do nothing - client = self._make_one(project="project-id") - client._channel_refresh_tasks = [object()] - with mock.patch.object(asyncio, "create_task") as create_task: - client.start_background_channel_refresh() - create_task.assert_not_called() - - @pytest.mark.asyncio - async def test_start_background_channel_refresh(self): + async def test_ctor_background_channel_refresh(self): # should create background tasks for each channel for pool_size in [1, 3, 7]: client = self._make_one(project="project-id", pool_size=pool_size) ping_and_warm = AsyncMock() client._ping_and_warm_instances = ping_and_warm - client.start_background_channel_refresh() self.assertEqual(len(client._channel_refresh_tasks), pool_size) for task in client._channel_refresh_tasks: self.assertIsInstance(task, asyncio.Task) - await asyncio.gather(*client._channel_refresh_tasks) await asyncio.sleep(0.1) self.assertEqual(ping_and_warm.call_count, pool_size) for channel in client.transport.channel_pool: ping_and_warm.assert_any_call(channel) - @pytest.mark.asyncio async def test__ping_and_warm_instances(self): # test with no instances gather = AsyncMock() @@ -142,7 +133,6 @@ async def test__ping_and_warm_instances(self): self.assertIsInstance(call, grpc.aio.UnaryUnaryCall) call._request["name"] = client._active_instances[idx] - @pytest.mark.asyncio async def test__manage_channel_first_sleep(self): # first sleep time should be `refresh_interval` seconds after client init import time @@ -172,7 +162,6 @@ async def test__manage_channel_first_sleep(self): self.assertAlmostEqual(call_time, expected_sleep, delta=0.1, msg=f"params={params}") - @pytest.mark.asyncio async def test__manage_channel_ping_and_warm(self): # should ping an warm all new channels, and old channels if sleeping client = self._make_one(project="project-id") @@ -203,7 +192,6 @@ async def test__manage_channel_ping_and_warm(self): pass ping_and_warm.assert_called_once_with(new_channel) - @pytest.mark.asyncio async def test__manage_channel_sleeps(self): # make sure that sleeps work as expected from collections import namedtuple @@ -233,7 +221,6 @@ async def test__manage_channel_sleeps(self): self.assertAlmostEqual(total_sleep, expected_sleep, delta=0.1, msg=f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}") - @pytest.mark.asyncio async def test__manage_channel_refresh(self): # make sure that channels are properly refreshed from collections import namedtuple @@ -262,48 +249,23 @@ async def test__manage_channel_refresh(self): self.assertEqual(call[0][1], expected_grace) self.assertEqual(call[0][2], new_channel) - @pytest.mark.asyncio - async def test_register_instance(self): - # create the client without calling start_background_channel_refresh - with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: - get_event_loop.side_effect = RuntimeError("no event loop") - client = self._make_one(project="project-id") - self.assertFalse(client._channel_refresh_tasks) - # first call should start background refresh - self.assertEqual(client._active_instances, set()) - await client.register_instance("instance-1") - self.assertEqual(len(client._active_instances), 1) - self.assertEqual(client._active_instances, {"projects/project-id/instances/instance-1"}) - self.assertTrue(client._channel_refresh_tasks) - # next call should not - with mock.patch.object(type(self._make_one()), "start_background_channel_refresh") as refresh_mock: - await client.register_instance("instance-2") - self.assertEqual(len(client._active_instances), 2) - self.assertEqual(client._active_instances, {"projects/project-id/instances/instance-1", "projects/project-id/instances/instance-2"}) - refresh_mock.assert_not_called() - - @pytest.mark.asyncio async def test_register_instance_ping_and_warm(self): # should ping and warm each new instance pool_size = 7 - with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: - get_event_loop.side_effect = RuntimeError("no event loop") - client = self._make_one(project="project-id", pool_size=pool_size) - # first call should start background refresh - self.assertFalse(client._channel_refresh_tasks) - await client.register_instance("instance-1") + client = self._make_one(project="project-id", pool_size=pool_size) self.assertEqual(len(client._channel_refresh_tasks), pool_size) + self.assertFalse(client._active_instances) # next calls should trigger ping and warm with mock.patch.object(type(self._make_one()), "_ping_and_warm_instances") as ping_mock: - await client.register_instance("instance-2") + # new instance should trigger ping and warm + await client.register_instance("instance-1") self.assertEqual(ping_mock.call_count, pool_size) - await client.register_instance("instance-3") + await client.register_instance("instance-2") self.assertEqual(ping_mock.call_count, pool_size * 2) # duplcate instances should not trigger ping and warm - await client.register_instance("instance-3") + await client.register_instance("instance-2") self.assertEqual(ping_mock.call_count, pool_size * 2) - @pytest.mark.asyncio async def test_remove_instance_registration(self): client = self._make_one(project="project-id") await client.register_instance("instance-1") @@ -317,7 +279,6 @@ async def test_remove_instance_registration(self): self.assertFalse(success) self.assertEqual(len(client._active_instances), 1) - @pytest.mark.asyncio async def test_get_table(self): from google.cloud.bigtable.client import Table client = self._make_one(project="project-id") @@ -333,15 +294,22 @@ async def test_get_table(self): self.assertEqual(table.app_profile_id, expected_app_profile_id) self.assertIs(table.client, client) - def test_get_table_no_loop(self): - client = self._make_one(project="project-id") - with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: - get_event_loop.side_effect = RuntimeError("no event loop") - client.get_table("instance-id", "table-id") - with self.assertWarns(Warning) as cm: - client.get_table("instance-id", "table-id") - self.assertIn("Table should be created in an asyncio event loop", str(cm.warning)) +class TestBigtableDataClientSync(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigtable.client import BigtableDataClient + + return BigtableDataClient + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + def test_ctor_sync(self): + # initializing client in a sync context should raise RuntimeError + with self.assertRaises(RuntimeError) as err: + self._make_one(project="project-id") + self.assertEqual(str(err.exception), "BigtableDataClient must be created within an async context") class TestTable(unittest.TestCase): From 5c86f57428e06978b05e87a9f0abbfba3f061447 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 12:16:16 -0700 Subject: [PATCH 110/349] reordered some things --- google/cloud/bigtable/client.py | 36 ++++++++++++++++----------------- tests/unit/test_client.py | 12 ++++++++++- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 727fc8460..7722db34b 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -86,11 +86,7 @@ def __init__( raise RuntimeError( f"{self.__class__.__name__} must be created within an async context" ) from e - super(BigtableDataClient, self).__init__( - project=project, - credentials=credentials, - client_options=client_options, - ) + # parse inputs if type(client_options) is dict: client_options = google.api_core.client_options.from_dict(client_options) if client_options is None: @@ -108,21 +104,25 @@ def __init__( ) if api_key_value and hasattr(google.auth._default, "get_api_key_credentials"): credentials = google.auth._default.get_api_key_credentials(api_key_value) - + # create client and transport objects + super(BigtableDataClient, self).__init__( + project=project, + credentials=credentials, + client_options=client_options, + ) self.transport = PooledBigtableGrpcAsyncIOTransport( - pool_size=pool_size, - credentials=credentials, - credentials_file=client_options.credentials_file, - host=api_endpoint, - scopes=client_options.scopes, - client_cert_source_for_mtls=client_cert_source_func, - quota_project_id=client_options.quota_project_id, - client_info=DEFAULT_CLIENT_INFO, - always_use_jwt_access=True, - api_audience=client_options.api_audience, - ) - self._gapic_client = BigtableAsyncClient( + pool_size=pool_size, credentials=credentials, + credentials_file=client_options.credentials_file, + host=api_endpoint, + scopes=client_options.scopes, + client_cert_source_for_mtls=client_cert_source_func, + quota_project_id=client_options.quota_project_id, + client_info=DEFAULT_CLIENT_INFO, + always_use_jwt_access=True, + api_audience=client_options.api_audience, + ) + self._gapic_client = BigtableAsyncClient( transport=self.transport, client_options=client_options, ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index fee10da6f..6253472fd 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -17,6 +17,8 @@ import unittest import grpc import asyncio +from google.api_core.client_options import ClientOptions +from google.auth.credentials import AnonymousCredentials # try/except added for compatibility with python < 3.8 try: from unittest import mock @@ -34,13 +36,14 @@ def _get_target_class(): def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) - @pytest.mark.asyncio async def test_ctor(self): expected_project = "project-id" expected_pool_size = 11 expected_metadata = [("a", "b")] + expected_credentials = AnonymousCredentials() client = self._make_one( project="project-id", pool_size=expected_pool_size, metadata=expected_metadata, + credentials=expected_credentials ) await asyncio.sleep(0.1) self.assertEqual(client.project, expected_project) @@ -48,6 +51,13 @@ async def test_ctor(self): self.assertEqual(client.metadata, expected_metadata) self.assertFalse(client._active_instances) self.assertEqual(len(client._channel_refresh_tasks), expected_pool_size) + self.assertEqual(client.transport._credentials, expected_credentials) + + async def test_ctor_client_options(self): + pass + + async def test_ctor_client_options_dict(self): + pass async def test_channel_pool_creation(self): From 3bc41317652ddd2c59706b490f4f6b1b8777e6f9 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 14:05:30 -0700 Subject: [PATCH 111/349] simpified client setup --- google/cloud/bigtable/client.py | 63 +++++++++++---------------------- tests/unit/test_client.py | 13 ++++--- 2 files changed, 29 insertions(+), 47 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 7722db34b..2076e1b8e 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -21,13 +21,16 @@ import grpc import time import warnings +import functools +from google.cloud.bigtable_v2.services.bigtable.client import BigtableClientMeta from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient from google.cloud.bigtable_v2.services.bigtable.async_client import DEFAULT_CLIENT_INFO from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( PooledBigtableGrpcAsyncIOTransport, ) from google.cloud.client import ClientWithProject +from google.cloud.client import _ClientProjectMixin from google.api_core.exceptions import GoogleAPICallError @@ -45,8 +48,13 @@ from google.cloud.bigtable.row_filters import RowFilter from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule +def create_partial_transport(pool_size=3): + class PartialTransport(PooledBigtableGrpcAsyncIOTransport): + __init__ = functools.partialmethod(PooledBigtableGrpcAsyncIOTransport.__init__, pool_size=pool_size) + return PartialTransport + +class BigtableDataClient(BigtableAsyncClient, _ClientProjectMixin): -class BigtableDataClient(ClientWithProject): def __init__( self, *, @@ -86,46 +94,18 @@ def __init__( raise RuntimeError( f"{self.__class__.__name__} must be created within an async context" ) from e - # parse inputs - if type(client_options) is dict: - client_options = google.api_core.client_options.from_dict(client_options) - if client_options is None: - client_options = client_options_lib.ClientOptions() - client_options = cast(client_options_lib.ClientOptions, client_options) - - api_endpoint, client_cert_source_func = BigtableAsyncClient.get_mtls_endpoint_and_cert_source( - client_options - ) - - api_key_value = getattr(client_options, "api_key", None) - if api_key_value and credentials: - raise ValueError( - "client_options.api_key and credentials are mutually exclusive" - ) - if api_key_value and hasattr(google.auth._default, "get_api_key_credentials"): - credentials = google.auth._default.get_api_key_credentials(api_key_value) - # create client and transport objects - super(BigtableDataClient, self).__init__( - project=project, + # set up transport in registry + PartialTransport = create_partial_transport(pool_size) + transport_str = f"pooled_grpc_asyncio_{pool_size}" + BigtableClientMeta._transport_registry[transport_str] = PartialTransport + # initialize client + _ClientProjectMixin.__init__(self, project=project, credentials=credentials) + BigtableAsyncClient.__init__( + self, + transport=transport_str, credentials=credentials, client_options=client_options, ) - self.transport = PooledBigtableGrpcAsyncIOTransport( - pool_size=pool_size, - credentials=credentials, - credentials_file=client_options.credentials_file, - host=api_endpoint, - scopes=client_options.scopes, - client_cert_source_for_mtls=client_cert_source_func, - quota_project_id=client_options.quota_project_id, - client_info=DEFAULT_CLIENT_INFO, - always_use_jwt_access=True, - api_audience=client_options.api_audience, - ) - self._gapic_client = BigtableAsyncClient( - transport=self.transport, - client_options=client_options, - ) self.metadata = metadata # keep track of active instances to for warmup on channel refresh self._active_instances: Set[str] = set() @@ -212,7 +192,7 @@ async def register_instance(self, instance_id: str): requests, and new channels will be warmed for each registered instance Channels will not be refreshed unless at least one instance is registered """ - instance_name = self._gapic_client.instance_path(self.project, instance_id) + instance_name = self.instance_path(self.project, instance_id) if instance_name not in self._active_instances: self._active_instances.add(instance_name) # call ping and warm on all existing channels @@ -231,14 +211,14 @@ async def remove_instance_registration(self, instance_id: str) -> bool: Returns: - True if instance was removed """ - instance_name = self._gapic_client.instance_path(self.project, instance_id) + instance_name = self.instance_path(self.project, instance_id) try: self._active_instances.remove(instance_name) return True except KeyError: return False - async def get_table( + def get_table( self, instance_id: str, table_id: str, @@ -255,7 +235,6 @@ async def get_table( app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles """ - await self.register_instance(instance_id) return Table(self, instance_id, table_id, app_profile_id) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 6253472fd..f4cfb2d1f 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -61,8 +61,9 @@ async def test_ctor_client_options_dict(self): async def test_channel_pool_creation(self): + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import PooledBigtableGrpcAsyncIOTransport pool_size = 14 - with mock.patch.object(type(self._make_one().transport), "create_channel") as create_channel: + with mock.patch.object(PooledBigtableGrpcAsyncIOTransport, "create_channel") as create_channel: client = self._make_one(project="project-id", pool_size=pool_size) self.assertEqual(create_channel.call_count, pool_size) # channels should be unique @@ -173,13 +174,14 @@ async def test__manage_channel_first_sleep(self): msg=f"params={params}") async def test__manage_channel_ping_and_warm(self): + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import PooledBigtableGrpcAsyncIOTransport # should ping an warm all new channels, and old channels if sleeping client = self._make_one(project="project-id") new_channel = grpc.aio.insecure_channel("localhost:8080") with mock.patch.object(asyncio, "sleep") as sleep: - with mock.patch.object(type(self._make_one().transport), "create_channel") as create_channel: + with mock.patch.object(PooledBigtableGrpcAsyncIOTransport, "create_channel") as create_channel: create_channel.return_value = new_channel - with mock.patch.object(type(self._make_one().transport), "replace_channel") as replace_channel: + with mock.patch.object(PooledBigtableGrpcAsyncIOTransport, "replace_channel") as replace_channel: replace_channel.side_effect = asyncio.CancelledError # should ping and warm old channel then new if sleep > 0 with mock.patch.object(type(self._make_one()), "_ping_and_warm_instances") as ping_and_warm: @@ -235,17 +237,18 @@ async def test__manage_channel_refresh(self): # make sure that channels are properly refreshed from collections import namedtuple import time + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import PooledBigtableGrpcAsyncIOTransport expected_grace = 9 expected_refresh = 0.5 channel_idx = 1 new_channel = grpc.aio.insecure_channel("localhost:8080") for num_cycles in [0, 1, 10, 100]: - with mock.patch.object(type(self._make_one().transport), "replace_channel") as replace_channel: + with mock.patch.object(PooledBigtableGrpcAsyncIOTransport, "replace_channel") as replace_channel: with mock.patch.object(asyncio, "sleep") as sleep: sleep.side_effect = [None for i in range(num_cycles)] + [asyncio.CancelledError] client = self._make_one(project="project-id") - with mock.patch.object(type(self._make_one().transport), "create_channel") as create_channel: + with mock.patch.object(PooledBigtableGrpcAsyncIOTransport, "create_channel") as create_channel: create_channel.return_value = new_channel try: await client._manage_channel(channel_idx, refresh_interval=expected_refresh, grace_period=expected_grace) From 3c4e0b686bb9f330d1439331c1179f94d26dee1a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 14:30:33 -0700 Subject: [PATCH 112/349] added test for veneer headers --- google/cloud/bigtable/client.py | 4 ++++ tests/unit/test_client.py | 27 ++++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 2076e1b8e..a82373ecb 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -98,6 +98,9 @@ def __init__( PartialTransport = create_partial_transport(pool_size) transport_str = f"pooled_grpc_asyncio_{pool_size}" BigtableClientMeta._transport_registry[transport_str] = PartialTransport + # set up client info headers for veneer library + client_info = DEFAULT_CLIENT_INFO + client_info.client_library_version = client_info.gapic_version # initialize client _ClientProjectMixin.__init__(self, project=project, credentials=credentials) BigtableAsyncClient.__init__( @@ -105,6 +108,7 @@ def __init__( transport=transport_str, credentials=credentials, client_options=client_options, + client_info=client_info, ) self.metadata = metadata # keep track of active instances to for warmup on channel refresh diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f4cfb2d1f..387ea0eaa 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -17,6 +17,8 @@ import unittest import grpc import asyncio +import re + from google.api_core.client_options import ClientOptions from google.auth.credentials import AnonymousCredentials # try/except added for compatibility with python < 3.8 @@ -26,6 +28,10 @@ except ImportError: # pragma: NO COVER import mock +VENEER_HEADER_REGEX = re.compile( + r"gapic\/[0-9]+\.[\w.-]+ gax\/[0-9]+\.[\w.-]+ gccl\/[0-9]+\.[\w.-]+ gl-python\/[0-9]+\.[\w.-]+ grpc\/[0-9]+\.[\w.-]+" +) + class TestBigtableDataClientAsync(unittest.IsolatedAsyncioTestCase): @staticmethod def _get_target_class(): @@ -53,11 +59,26 @@ async def test_ctor(self): self.assertEqual(len(client._channel_refresh_tasks), expected_pool_size) self.assertEqual(client.transport._credentials, expected_credentials) - async def test_ctor_client_options(self): + async def test_ctor_super_inits(self): pass - async def test_ctor_client_options_dict(self): - pass + async def test_veneer_grpc_headers(self): + # client_info should be populated with headers to + # detect as a veneer client + patch = mock.patch("google.api_core.gapic_v1.method.wrap_method") + with patch as gapic_mock: + self._make_one(project="project-id") + wrapped_call_list = gapic_mock.call_args_list + self.assertGreater(len(wrapped_call_list), 0) + # each wrapped call should have veneer headers + for call in wrapped_call_list: + client_info = call.kwargs["client_info"] + self.assertIsNotNone(client_info, f"{call} has no client_info") + wrapped_user_agent_sorted = " ".join( + sorted(client_info.to_user_agent().split(" ")) + ) + self.assertTrue(VENEER_HEADER_REGEX.match(wrapped_user_agent_sorted), + f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}") async def test_channel_pool_creation(self): From 197bf950ee9c19d980e5c2f9312dd69e42038ccb Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 14:43:16 -0700 Subject: [PATCH 113/349] added super init test --- tests/unit/test_client.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 387ea0eaa..193be737f 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -60,7 +60,32 @@ async def test_ctor(self): self.assertEqual(client.transport._credentials, expected_credentials) async def test_ctor_super_inits(self): - pass + from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient + from google.cloud.client import _ClientProjectMixin + + project = "project-id" + pool_size = 11 + credentials = AnonymousCredentials() + client_options = {"api_endpoint": "foo.bar:1234"} + metadata = [("a", "b")] + transport_str = f"pooled_grpc_asyncio_{pool_size}" + with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: + with mock.patch.object(_ClientProjectMixin, "__init__") as client_project_mixin_init: + try: + self._make_one(project=project, pool_size=pool_size, credentials=credentials, client_options=client_options, metadata=metadata) + except AttributeError: + pass + # test gapic superclass init was called + self.assertEqual(bigtable_client_init.call_count, 1) + kwargs = bigtable_client_init.call_args[1] + self.assertEqual(kwargs["transport"], transport_str) + self.assertEqual(kwargs["credentials"], credentials) + self.assertEqual(kwargs["client_options"], client_options) + # test mixin superclass init was called + self.assertEqual(client_project_mixin_init.call_count, 1) + kwargs = client_project_mixin_init.call_args[1] + self.assertEqual(kwargs["project"], project) + self.assertEqual(kwargs["credentials"], credentials) async def test_veneer_grpc_headers(self): # client_info should be populated with headers to From 9d8122b162f87a6b362f2d28d1a69dd3a14294bb Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 15:07:27 -0700 Subject: [PATCH 114/349] added subclass generator to gapic template --- gapic-generator-fork | 2 +- google/cloud/bigtable/client.py | 9 ++------- .../bigtable/transports/pooled_grpc_asyncio.py | 17 +++++++++++++++++ 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index cdda28398..7e82e57be 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit cdda28398f0d8e6c9f85a750cb684d56f64c0cc9 +Subproject commit 7e82e57bee9a7071877772f77adb481a4d650750 diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index a82373ecb..896e91e7e 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -48,11 +48,6 @@ from google.cloud.bigtable.row_filters import RowFilter from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule -def create_partial_transport(pool_size=3): - class PartialTransport(PooledBigtableGrpcAsyncIOTransport): - __init__ = functools.partialmethod(PooledBigtableGrpcAsyncIOTransport.__init__, pool_size=pool_size) - return PartialTransport - class BigtableDataClient(BigtableAsyncClient, _ClientProjectMixin): def __init__( @@ -95,9 +90,9 @@ def __init__( f"{self.__class__.__name__} must be created within an async context" ) from e # set up transport in registry - PartialTransport = create_partial_transport(pool_size) transport_str = f"pooled_grpc_asyncio_{pool_size}" - BigtableClientMeta._transport_registry[transport_str] = PartialTransport + transport = PooledBigtableGrpcAsyncIOTransport.with_fixed_size(pool_size) + BigtableClientMeta._transport_registry[transport_str] = transport # set up client info headers for veneer library client_info = DEFAULT_CLIENT_INFO client_info.client_library_version = client_info.gapic_version diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index e91898435..71388f3a6 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -15,6 +15,7 @@ # import asyncio import warnings +from functools import partialmethod from typing import Awaitable, Callable, Dict, Optional, Sequence, Tuple, Union, List from google.api_core import gapic_v1 @@ -47,6 +48,22 @@ class PooledBigtableGrpcAsyncIOTransport(BigtableTransport): when making requests. Channels are rotated in a round-robin fashion. """ + @classmethod + def with_fixed_size(cls, pool_size) -> "PooledBigtableGrpcAsyncIOTransport": + """ + Creates a new class with a fixed channel pool size. + + A fixed channel pool makes compatibility with other transports easier, + as the initializer signature is the same. + """ + + class PooledTransportFixed(cls): + __init__ = partialmethod(cls.__init__, pool_size=pool_size) + + PooledTransportFixed.__name__ = f"{cls.__name__}_{pool_size}" + PooledTransportFixed.__qualname__ = PooledTransportFixed.__name__ + return PooledTransportFixed + @classmethod def create_channel( cls, From 2632b701ce9a69a2dccaf998ed5fabdd0eee3e45 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 15:39:49 -0700 Subject: [PATCH 115/349] finished table tests --- google/cloud/bigtable/client.py | 33 ++++++++++----- tests/unit/test_client.py | 72 +++++++++++++++++++++++++-------- 2 files changed, 78 insertions(+), 27 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 896e91e7e..97f0136f0 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -81,14 +81,10 @@ def __init__( Client options used to set user options on the client. API Endpoint should be set through client_options. metadata: a list of metadata headers to be attached to all calls with this client + Raises: + - RuntimeError if called outside of an async run loop context + - ValueError if pool_size is less than 1 """ - # check for active run loop - try: - asyncio.get_running_loop() - except RuntimeError as e: - raise RuntimeError( - f"{self.__class__.__name__} must be created within an async context" - ) from e # set up transport in registry transport_str = f"pooled_grpc_asyncio_{pool_size}" transport = PooledBigtableGrpcAsyncIOTransport.with_fixed_size(pool_size) @@ -98,6 +94,7 @@ def __init__( client_info.client_library_version = client_info.gapic_version # initialize client _ClientProjectMixin.__init__(self, project=project, credentials=credentials) + # raises RuntimeError if called outside of an async run loop context BigtableAsyncClient.__init__( self, transport=transport_str, @@ -222,6 +219,7 @@ def get_table( instance_id: str, table_id: str, app_profile_id: str | None = None, + metadata: list[tuple[str, str]] | None = None, ) -> Table: """ Returns a table instance for making data API requests @@ -233,8 +231,9 @@ def get_table( table_id: The ID of the table. app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles + metadata: a list of metadata headers to be attached to all calls with this client """ - return Table(self, instance_id, table_id, app_profile_id) + return Table(self, instance_id, table_id, app_profile_id, metadata) class Table: @@ -251,17 +250,31 @@ def __init__( instance_id: str, table_id: str, app_profile_id: str | None = None, + metadata: list[tuple[str, str]] | None = None, ): """ Initialize a Table instance - Tables are not meant to be instantiated directly, but are returned by - `BigtableDataClient.get_table` + Must be created within an async run loop context + + Args: + instance_id: The Bigtable instance ID to associate with this client + instance_id is combined with the client's project to fully + specify the instance + table_id: The ID of the table. + app_profile_id: (Optional) The app profile to associate with requests. + https://cloud.google.com/bigtable/docs/app-profiles + metadata: a list of metadata headers to be attached to all calls with this client + Raises: + - RuntimeError if called outside of an async run loop context """ self.client = client self.instance = instance_id self.table_id = table_id self.app_profile_id = app_profile_id + self.metadata = metadata + # raises RuntimeError if called outside of an async run loop context + self._register_instance_task = asyncio.create_task(self.client.register_instance(instance_id)) async def read_rows_stream( self, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 193be737f..902032a80 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -341,42 +341,80 @@ async def test_remove_instance_registration(self): async def test_get_table(self): from google.cloud.bigtable.client import Table client = self._make_one(project="project-id") + self.assertFalse(client._active_instances) expected_table_id = "table-id" expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" - with mock.patch.object(type(self._make_one()), "register_instance") as register_instance: - table = client.get_table(expected_instance_id, expected_table_id, expected_app_profile_id) - register_instance.assert_called_once_with(expected_instance_id) + expected_metadata = [('a', 'b')] + table = client.get_table(expected_instance_id, expected_table_id, expected_app_profile_id, expected_metadata) + await asyncio.sleep(0) self.assertIsInstance(table, Table) self.assertEqual(table.table_id, expected_table_id) self.assertEqual(table.instance, expected_instance_id) self.assertEqual(table.app_profile_id, expected_app_profile_id) + self.assertEqual(table.metadata, expected_metadata) self.assertIs(table.client, client) + full_instance_name = client.instance_path(client.project, expected_instance_id) + self.assertIn(full_instance_name, client._active_instances) + async def test_multiple_pool_sizes(self): + # should be able to create multiple clients with different pool sizes without issue + pool_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256] + for pool_size in pool_sizes: + client = self._make_one(project="project-id", pool_size=pool_size) + self.assertEqual(len(client._channel_refresh_tasks), pool_size) + client_duplicate = self._make_one(project="project-id", pool_size=pool_size) + self.assertEqual(len(client_duplicate._channel_refresh_tasks), pool_size) + self.assertIn(str(pool_size), str(client.transport)) -class TestBigtableDataClientSync(unittest.TestCase): - @staticmethod - def _get_target_class(): - from google.cloud.bigtable.client import BigtableDataClient - return BigtableDataClient +class TestSyncInitialization(unittest.TestCase): - def _make_one(self, *args, **kwargs): - return self._get_target_class()(*args, **kwargs) - - def test_ctor_sync(self): + def test_client_ctor_sync(self): # initializing client in a sync context should raise RuntimeError + from google.cloud.bigtable.client import BigtableDataClient with self.assertRaises(RuntimeError) as err: - self._make_one(project="project-id") - self.assertEqual(str(err.exception), "BigtableDataClient must be created within an async context") + BigtableDataClient(project="project-id") + self.assertIn("no current event loop", str(err.exception)) -class TestTable(unittest.TestCase): + def test_table_ctor_sync(self): + # initializing client in a sync context should raise RuntimeError + from google.cloud.bigtable.client import Table + client = mock.Mock() + with self.assertRaises(RuntimeError) as err: + Table(client, "instance-id", "table-id") + self.assertEqual(str(err.exception), "no running event loop") + +class TestTable(unittest.IsolatedAsyncioTestCase): def _make_one(self, *args, **kwargs): from google.cloud.bigtable.client import BigtableDataClient return BigtableDataClient().get_table(*args, **kwargs) - def test_ctor(self): - pass + async def test_ctor(self): + from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.client import Table + expected_table_id = "table-id" + expected_instance_id = "instance-id" + expected_app_profile_id = "app-profile-id" + expected_metadata = [('a', 'b')] + client = BigtableDataClient() + self.assertFalse(client._active_instances) + + table = Table(client, expected_instance_id, expected_table_id, expected_app_profile_id, expected_metadata) + await asyncio.sleep(0) + self.assertEqual(table.table_id, expected_table_id) + self.assertEqual(table.instance, expected_instance_id) + self.assertEqual(table.app_profile_id, expected_app_profile_id) + self.assertEqual(table.metadata, expected_metadata) + self.assertIs(table.client, client) + full_instance_name = client.instance_path(client.project, expected_instance_id) + self.assertIn(full_instance_name, client._active_instances) + # ensure task reaches completion + await table._register_instance_task + self.assertTrue(table._register_instance_task.done()) + self.assertFalse(table._register_instance_task.cancelled()) + self.assertIsNone(table._register_instance_task.exception()) + From d4e052b5f671aeaaec8d1a0c9acc3c14c59a6cd1 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 15:42:02 -0700 Subject: [PATCH 116/349] ran blacken --- google/cloud/bigtable/client.py | 7 +- tests/unit/test_client.py | 181 +++++++++++++++++++++++--------- 2 files changed, 138 insertions(+), 50 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 97f0136f0..5c059198b 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -48,8 +48,8 @@ from google.cloud.bigtable.row_filters import RowFilter from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule -class BigtableDataClient(BigtableAsyncClient, _ClientProjectMixin): +class BigtableDataClient(BigtableAsyncClient, _ClientProjectMixin): def __init__( self, *, @@ -112,7 +112,6 @@ def __init__( refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) self._channel_refresh_tasks.append(refresh_task) - async def _ping_and_warm_instances( self, channel: grpc.aio.Channel ) -> list[GoogleAPICallError | None]: @@ -274,7 +273,9 @@ def __init__( self.app_profile_id = app_profile_id self.metadata = metadata # raises RuntimeError if called outside of an async run loop context - self._register_instance_task = asyncio.create_task(self.client.register_instance(instance_id)) + self._register_instance_task = asyncio.create_task( + self.client.register_instance(instance_id) + ) async def read_rows_stream( self, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 902032a80..a1f4a7ac6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -21,6 +21,7 @@ from google.api_core.client_options import ClientOptions from google.auth.credentials import AnonymousCredentials + # try/except added for compatibility with python < 3.8 try: from unittest import mock @@ -32,6 +33,7 @@ r"gapic\/[0-9]+\.[\w.-]+ gax\/[0-9]+\.[\w.-]+ gccl\/[0-9]+\.[\w.-]+ gl-python\/[0-9]+\.[\w.-]+ grpc\/[0-9]+\.[\w.-]+" ) + class TestBigtableDataClientAsync(unittest.IsolatedAsyncioTestCase): @staticmethod def _get_target_class(): @@ -48,8 +50,10 @@ async def test_ctor(self): expected_metadata = [("a", "b")] expected_credentials = AnonymousCredentials() client = self._make_one( - project="project-id", pool_size=expected_pool_size, metadata=expected_metadata, - credentials=expected_credentials + project="project-id", + pool_size=expected_pool_size, + metadata=expected_metadata, + credentials=expected_credentials, ) await asyncio.sleep(0.1) self.assertEqual(client.project, expected_project) @@ -60,7 +64,9 @@ async def test_ctor(self): self.assertEqual(client.transport._credentials, expected_credentials) async def test_ctor_super_inits(self): - from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient + from google.cloud.bigtable_v2.services.bigtable.async_client import ( + BigtableAsyncClient, + ) from google.cloud.client import _ClientProjectMixin project = "project-id" @@ -70,9 +76,17 @@ async def test_ctor_super_inits(self): metadata = [("a", "b")] transport_str = f"pooled_grpc_asyncio_{pool_size}" with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: - with mock.patch.object(_ClientProjectMixin, "__init__") as client_project_mixin_init: + with mock.patch.object( + _ClientProjectMixin, "__init__" + ) as client_project_mixin_init: try: - self._make_one(project=project, pool_size=pool_size, credentials=credentials, client_options=client_options, metadata=metadata) + self._make_one( + project=project, + pool_size=pool_size, + credentials=credentials, + client_options=client_options, + metadata=metadata, + ) except AttributeError: pass # test gapic superclass init was called @@ -102,14 +116,20 @@ async def test_veneer_grpc_headers(self): wrapped_user_agent_sorted = " ".join( sorted(client_info.to_user_agent().split(" ")) ) - self.assertTrue(VENEER_HEADER_REGEX.match(wrapped_user_agent_sorted), - f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}") - + self.assertTrue( + VENEER_HEADER_REGEX.match(wrapped_user_agent_sorted), + f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}", + ) async def test_channel_pool_creation(self): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import PooledBigtableGrpcAsyncIOTransport + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) + pool_size = 14 - with mock.patch.object(PooledBigtableGrpcAsyncIOTransport, "create_channel") as create_channel: + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "create_channel" + ) as create_channel: client = self._make_one(project="project-id", pool_size=pool_size) self.assertEqual(create_channel.call_count, pool_size) # channels should be unique @@ -124,25 +144,32 @@ async def test_channel_pool_rotation(self): self.assertEqual(len(client.transport.channel_pool), pool_size) with mock.patch.object(type(client.transport), "next_channel") as next_channel: - with mock.patch.object(type(client.transport.channel_pool[0]), "unary_unary") as unary_unary: + with mock.patch.object( + type(client.transport.channel_pool[0]), "unary_unary" + ) as unary_unary: # calling an rpc `pool_size` times should use a different channel each time for i in range(pool_size): - channel_1 = client.transport.channel_pool[client.transport._next_idx] + channel_1 = client.transport.channel_pool[ + client.transport._next_idx + ] next_channel.return_value = channel_1 client.transport.ping_and_warm() self.assertEqual(next_channel.call_count, i + 1) channel_1.unary_unary.assert_called_once() - async def test_channel_pool_replace(self): pool_size = 7 client = self._make_one(project="project-id", pool_size=pool_size) for replace_idx in range(pool_size): start_pool = [channel for channel in client.transport.channel_pool] grace_period = 9 - with mock.patch.object(type(client.transport.channel_pool[0]), "close") as close: + with mock.patch.object( + type(client.transport.channel_pool[0]), "close" + ) as close: new_channel = grpc.aio.insecure_channel("localhost:8080") - await client.transport.replace_channel(replace_idx, grace=grace_period, new_channel=new_channel) + await client.transport.replace_channel( + replace_idx, grace=grace_period, new_channel=new_channel + ) close.assert_called_once_with(grace=grace_period) close.assert_awaited_once() self.assertEqual(client.transport.channel_pool[replace_idx], new_channel) @@ -178,7 +205,12 @@ async def test__ping_and_warm_instances(self): self.assertFalse(gather.call_args.args) self.assertEqual(gather.call_args.kwargs, {"return_exceptions": True}) # test with instances - client._active_instances = ["instance-1", "instance-2", "instance-3", "instance-4"] + client._active_instances = [ + "instance-1", + "instance-2", + "instance-3", + "instance-4", + ] gather = AsyncMock() asyncio.gather = gather await client._ping_and_warm_instances(channel) @@ -194,7 +226,10 @@ async def test__manage_channel_first_sleep(self): # first sleep time should be `refresh_interval` seconds after client init import time from collections import namedtuple - params = namedtuple('params', ['refresh_interval', 'wait_time', 'expected_sleep']) + + params = namedtuple( + "params", ["refresh_interval", "wait_time", "expected_sleep"] + ) test_params = [ params(refresh_interval=0, wait_time=0, expected_sleep=0), params(refresh_interval=0, wait_time=1, expected_sleep=0), @@ -216,21 +251,31 @@ async def test__manage_channel_first_sleep(self): pass sleep.assert_called_once() call_time = sleep.call_args[0][0] - self.assertAlmostEqual(call_time, expected_sleep, delta=0.1, - msg=f"params={params}") + self.assertAlmostEqual( + call_time, expected_sleep, delta=0.1, msg=f"params={params}" + ) async def test__manage_channel_ping_and_warm(self): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import PooledBigtableGrpcAsyncIOTransport + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) + # should ping an warm all new channels, and old channels if sleeping client = self._make_one(project="project-id") new_channel = grpc.aio.insecure_channel("localhost:8080") with mock.patch.object(asyncio, "sleep") as sleep: - with mock.patch.object(PooledBigtableGrpcAsyncIOTransport, "create_channel") as create_channel: + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "create_channel" + ) as create_channel: create_channel.return_value = new_channel - with mock.patch.object(PooledBigtableGrpcAsyncIOTransport, "replace_channel") as replace_channel: + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "replace_channel" + ) as replace_channel: replace_channel.side_effect = asyncio.CancelledError # should ping and warm old channel then new if sleep > 0 - with mock.patch.object(type(self._make_one()), "_ping_and_warm_instances") as ping_and_warm: + with mock.patch.object( + type(self._make_one()), "_ping_and_warm_instances" + ) as ping_and_warm: try: channel_idx = 2 old_channel = client.transport.channel_pool[channel_idx] @@ -239,13 +284,17 @@ async def test__manage_channel_ping_and_warm(self): pass self.assertEqual(ping_and_warm.call_count, 2) self.assertNotEqual(old_channel, new_channel) - called_with = [call[0][0] for call in ping_and_warm.call_args_list] + called_with = [ + call[0][0] for call in ping_and_warm.call_args_list + ] self.assertIn(old_channel, called_with) self.assertIn(new_channel, called_with) # should ping and warm instantly new channel only if not sleeping - with mock.patch.object(type(self._make_one()), "_ping_and_warm_instances") as ping_and_warm: + with mock.patch.object( + type(self._make_one()), "_ping_and_warm_instances" + ) as ping_and_warm: try: - await client._manage_channel(0, 0) + await client._manage_channel(0, 0) except asyncio.CancelledError: pass ping_and_warm.assert_called_once_with(new_channel) @@ -254,9 +303,12 @@ async def test__manage_channel_sleeps(self): # make sure that sleeps work as expected from collections import namedtuple import time - params = namedtuple('params', ['refresh_interval', 'num_cycles', 'expected_sleep']) + + params = namedtuple( + "params", ["refresh_interval", "num_cycles", "expected_sleep"] + ) test_params = [ - params(refresh_interval=None, num_cycles=1, expected_sleep=60*45), + params(refresh_interval=None, num_cycles=1, expected_sleep=60 * 45), params(refresh_interval=10, num_cycles=10, expected_sleep=100), params(refresh_interval=10, num_cycles=1, expected_sleep=10), ] @@ -265,7 +317,9 @@ async def test__manage_channel_sleeps(self): time.return_value = 0 for refresh_interval, num_cycles, expected_sleep in test_params: with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = [None for i in range(num_cycles-1)] + [asyncio.CancelledError] + sleep.side_effect = [None for i in range(num_cycles - 1)] + [ + asyncio.CancelledError + ] try: client = self._make_one(project="project-id") if refresh_interval is not None: @@ -276,31 +330,48 @@ async def test__manage_channel_sleeps(self): pass self.assertEqual(sleep.call_count, num_cycles) total_sleep = sum([call[0][0] for call in sleep.call_args_list]) - self.assertAlmostEqual(total_sleep, expected_sleep, delta=0.1, - msg=f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}") + self.assertAlmostEqual( + total_sleep, + expected_sleep, + delta=0.1, + msg=f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}", + ) async def test__manage_channel_refresh(self): # make sure that channels are properly refreshed from collections import namedtuple import time - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import PooledBigtableGrpcAsyncIOTransport + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) + expected_grace = 9 expected_refresh = 0.5 channel_idx = 1 new_channel = grpc.aio.insecure_channel("localhost:8080") for num_cycles in [0, 1, 10, 100]: - with mock.patch.object(PooledBigtableGrpcAsyncIOTransport, "replace_channel") as replace_channel: + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "replace_channel" + ) as replace_channel: with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = [None for i in range(num_cycles)] + [asyncio.CancelledError] + sleep.side_effect = [None for i in range(num_cycles)] + [ + asyncio.CancelledError + ] client = self._make_one(project="project-id") - with mock.patch.object(PooledBigtableGrpcAsyncIOTransport, "create_channel") as create_channel: + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "create_channel" + ) as create_channel: create_channel.return_value = new_channel try: - await client._manage_channel(channel_idx, refresh_interval=expected_refresh, grace_period=expected_grace) + await client._manage_channel( + channel_idx, + refresh_interval=expected_refresh, + grace_period=expected_grace, + ) except asyncio.CancelledError: pass - self.assertEqual(sleep.call_count, num_cycles+1) + self.assertEqual(sleep.call_count, num_cycles + 1) self.assertEqual(create_channel.call_count, num_cycles) self.assertEqual(replace_channel.call_count, num_cycles) for call in replace_channel.call_args_list: @@ -315,7 +386,9 @@ async def test_register_instance_ping_and_warm(self): self.assertEqual(len(client._channel_refresh_tasks), pool_size) self.assertFalse(client._active_instances) # next calls should trigger ping and warm - with mock.patch.object(type(self._make_one()), "_ping_and_warm_instances") as ping_mock: + with mock.patch.object( + type(self._make_one()), "_ping_and_warm_instances" + ) as ping_mock: # new instance should trigger ping and warm await client.register_instance("instance-1") self.assertEqual(ping_mock.call_count, pool_size) @@ -333,20 +406,28 @@ async def test_remove_instance_registration(self): success = await client.remove_instance_registration("instance-1") self.assertTrue(success) self.assertEqual(len(client._active_instances), 1) - self.assertEqual(client._active_instances, {"projects/project-id/instances/instance-2"}) + self.assertEqual( + client._active_instances, {"projects/project-id/instances/instance-2"} + ) success = await client.remove_instance_registration("nonexistant") self.assertFalse(success) self.assertEqual(len(client._active_instances), 1) async def test_get_table(self): from google.cloud.bigtable.client import Table + client = self._make_one(project="project-id") self.assertFalse(client._active_instances) expected_table_id = "table-id" expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" - expected_metadata = [('a', 'b')] - table = client.get_table(expected_instance_id, expected_table_id, expected_app_profile_id, expected_metadata) + expected_metadata = [("a", "b")] + table = client.get_table( + expected_instance_id, + expected_table_id, + expected_app_profile_id, + expected_metadata, + ) await asyncio.sleep(0) self.assertIsInstance(table, Table) self.assertEqual(table.table_id, expected_table_id) @@ -369,25 +450,25 @@ async def test_multiple_pool_sizes(self): class TestSyncInitialization(unittest.TestCase): - def test_client_ctor_sync(self): # initializing client in a sync context should raise RuntimeError from google.cloud.bigtable.client import BigtableDataClient + with self.assertRaises(RuntimeError) as err: BigtableDataClient(project="project-id") self.assertIn("no current event loop", str(err.exception)) - def test_table_ctor_sync(self): # initializing client in a sync context should raise RuntimeError from google.cloud.bigtable.client import Table + client = mock.Mock() with self.assertRaises(RuntimeError) as err: Table(client, "instance-id", "table-id") self.assertEqual(str(err.exception), "no running event loop") -class TestTable(unittest.IsolatedAsyncioTestCase): +class TestTable(unittest.IsolatedAsyncioTestCase): def _make_one(self, *args, **kwargs): from google.cloud.bigtable.client import BigtableDataClient @@ -396,14 +477,21 @@ def _make_one(self, *args, **kwargs): async def test_ctor(self): from google.cloud.bigtable.client import BigtableDataClient from google.cloud.bigtable.client import Table + expected_table_id = "table-id" expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" - expected_metadata = [('a', 'b')] + expected_metadata = [("a", "b")] client = BigtableDataClient() self.assertFalse(client._active_instances) - table = Table(client, expected_instance_id, expected_table_id, expected_app_profile_id, expected_metadata) + table = Table( + client, + expected_instance_id, + expected_table_id, + expected_app_profile_id, + expected_metadata, + ) await asyncio.sleep(0) self.assertEqual(table.table_id, expected_table_id) self.assertEqual(table.instance, expected_instance_id) @@ -417,4 +505,3 @@ async def test_ctor(self): self.assertTrue(table._register_instance_task.done()) self.assertFalse(table._register_instance_task.cancelled()) self.assertIsNone(table._register_instance_task.exception()) - From 1aa694b6f61d19cfbe9b56546d7034feb359a298 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 16:06:26 -0700 Subject: [PATCH 117/349] got tests working --- google/cloud/bigtable/client.py | 31 ++++++++++++++++++------------- tests/unit/test_client.py | 30 ++++++++++++++++++++++-------- 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 5c059198b..73cb9ad8f 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -15,13 +15,11 @@ from __future__ import annotations -from typing import cast, Any, AsyncIterable, Optional, Set, TYPE_CHECKING +from typing import cast, Any, Optional, AsyncIterable, Set, TYPE_CHECKING import asyncio import grpc import time -import warnings -import functools from google.cloud.bigtable_v2.services.bigtable.client import BigtableClientMeta from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient @@ -29,7 +27,6 @@ from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( PooledBigtableGrpcAsyncIOTransport, ) -from google.cloud.client import ClientWithProject from google.cloud.client import _ClientProjectMixin from google.api_core.exceptions import GoogleAPICallError @@ -92,6 +89,12 @@ def __init__( # set up client info headers for veneer library client_info = DEFAULT_CLIENT_INFO client_info.client_library_version = client_info.gapic_version + # parse client options + if type(client_options) is dict: + client_options = client_options_lib.from_dict(client_options) + client_options = cast( + Optional[client_options_lib.ClientOptions], client_options + ) # initialize client _ClientProjectMixin.__init__(self, project=project, credentials=credentials) # raises RuntimeError if called outside of an async run loop context @@ -108,7 +111,7 @@ def __init__( # attempt to start background tasks self._channel_init_time = time.time() self._channel_refresh_tasks: list[asyncio.Task[None]] = [] - for channel_idx in range(len(self.transport.channel_pool)): + for channel_idx in range(pool_size): refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) self._channel_refresh_tasks.append(refresh_task) @@ -151,22 +154,23 @@ async def _manage_channel( grace_period: time to allow previous channel to serve existing requests before closing, in seconds """ + transport = cast(PooledBigtableGrpcAsyncIOTransport, self.transport) first_refresh = self._channel_init_time + refresh_interval next_sleep = max(first_refresh - time.time(), 0) if next_sleep > 0: # warm the current channel immediately - channel = self.transport.channel_pool[channel_idx] + channel = transport.channel_pool[channel_idx] await self._ping_and_warm_instances(channel) # continuously refresh the channel every `refresh_interval` seconds while True: await asyncio.sleep(next_sleep) # prepare new channel for use - new_channel = self.transport.create_channel( + new_channel = transport.create_channel( self.transport._host, - credentials=self.transport._credentials, - scopes=self.transport._scopes, - ssl_credentials=self.transport._ssl_channel_credentials, - quota_project_id=self.transport._quota_project_id, + credentials=transport._credentials, + scopes=transport._scopes, + ssl_credentials=transport._ssl_channel_credentials, + quota_project_id=transport._quota_project_id, options=[ ("grpc.max_send_message_length", -1), ("grpc.max_receive_message_length", -1), @@ -175,7 +179,7 @@ async def _manage_channel( await self._ping_and_warm_instances(new_channel) # cycle channel out of use, with long grace window before closure start_timestamp = time.time() - await self.transport.replace_channel(channel_idx, grace_period, new_channel) + await transport.replace_channel(channel_idx, grace_period, new_channel) # subtract the time spent waiting for the channel to be replaced next_sleep = refresh_interval - (time.time() - start_timestamp) @@ -187,11 +191,12 @@ async def register_instance(self, instance_id: str): requests, and new channels will be warmed for each registered instance Channels will not be refreshed unless at least one instance is registered """ + transport = cast(PooledBigtableGrpcAsyncIOTransport, self.transport) instance_name = self.instance_path(self.project, instance_id) if instance_name not in self._active_instances: self._active_instances.add(instance_name) # call ping and warm on all existing channels - for channel in self.transport.channel_pool: + for channel in transport.channel_pool: await self._ping_and_warm_instances(channel) async def remove_instance_registration(self, instance_id: str) -> bool: diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index a1f4a7ac6..afc4464a5 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -13,13 +13,11 @@ # limitations under the License. -import pytest import unittest import grpc import asyncio import re -from google.api_core.client_options import ClientOptions from google.auth.credentials import AnonymousCredentials # try/except added for compatibility with python < 3.8 @@ -68,11 +66,13 @@ async def test_ctor_super_inits(self): BigtableAsyncClient, ) from google.cloud.client import _ClientProjectMixin + from google.api_core import client_options as client_options_lib project = "project-id" pool_size = 11 credentials = AnonymousCredentials() client_options = {"api_endpoint": "foo.bar:1234"} + options_parsed = client_options_lib.from_dict(client_options) metadata = [("a", "b")] transport_str = f"pooled_grpc_asyncio_{pool_size}" with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: @@ -84,7 +84,7 @@ async def test_ctor_super_inits(self): project=project, pool_size=pool_size, credentials=credentials, - client_options=client_options, + client_options=options_parsed, metadata=metadata, ) except AttributeError: @@ -94,13 +94,29 @@ async def test_ctor_super_inits(self): kwargs = bigtable_client_init.call_args[1] self.assertEqual(kwargs["transport"], transport_str) self.assertEqual(kwargs["credentials"], credentials) - self.assertEqual(kwargs["client_options"], client_options) + self.assertEqual(kwargs["client_options"], options_parsed) # test mixin superclass init was called self.assertEqual(client_project_mixin_init.call_count, 1) kwargs = client_project_mixin_init.call_args[1] self.assertEqual(kwargs["project"], project) self.assertEqual(kwargs["credentials"], credentials) + async def test_ctor_dict_options(self): + from google.cloud.bigtable_v2.services.bigtable.async_client import ( + BigtableAsyncClient, + ) + from google.api_core.client_options import ClientOptions + + client_options = {"api_endpoint": "foo.bar:1234"} + with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: + self._make_one(client_options=client_options) + bigtable_client_init.assert_called_once() + kwargs = bigtable_client_init.call_args[1] + called_options = kwargs["client_options"] + self.assertEqual(called_options.api_endpoint, "foo.bar:1234") + self.assertIsInstance(called_options, ClientOptions) + + async def test_veneer_grpc_headers(self): # client_info should be populated with headers to # detect as a veneer client @@ -146,7 +162,7 @@ async def test_channel_pool_rotation(self): with mock.patch.object(type(client.transport), "next_channel") as next_channel: with mock.patch.object( type(client.transport.channel_pool[0]), "unary_unary" - ) as unary_unary: + ): # calling an rpc `pool_size` times should use a different channel each time for i in range(pool_size): channel_1 = client.transport.channel_pool[ @@ -263,7 +279,7 @@ async def test__manage_channel_ping_and_warm(self): # should ping an warm all new channels, and old channels if sleeping client = self._make_one(project="project-id") new_channel = grpc.aio.insecure_channel("localhost:8080") - with mock.patch.object(asyncio, "sleep") as sleep: + with mock.patch.object(asyncio, "sleep"): with mock.patch.object( PooledBigtableGrpcAsyncIOTransport, "create_channel" ) as create_channel: @@ -339,8 +355,6 @@ async def test__manage_channel_sleeps(self): async def test__manage_channel_refresh(self): # make sure that channels are properly refreshed - from collections import namedtuple - import time from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( PooledBigtableGrpcAsyncIOTransport, ) From e2d4bd566a282fd39a86d3bdd6e95a820b940aa8 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 16:10:37 -0700 Subject: [PATCH 118/349] fixed type --- gapic-generator-fork | 2 +- .../bigtable/transports/pooled_grpc_asyncio.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 7e82e57be..1a5660273 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 7e82e57bee9a7071877772f77adb481a4d650750 +Subproject commit 1a56602733106b80142d944885fc37374d13f9ef diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index 71388f3a6..834674108 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -16,7 +16,17 @@ import asyncio import warnings from functools import partialmethod -from typing import Awaitable, Callable, Dict, Optional, Sequence, Tuple, Union, List +from typing import ( + Awaitable, + Callable, + Dict, + Optional, + Sequence, + Tuple, + Union, + List, + Type, +) from google.api_core import gapic_v1 from google.api_core import grpc_helpers_async @@ -49,7 +59,7 @@ class PooledBigtableGrpcAsyncIOTransport(BigtableTransport): """ @classmethod - def with_fixed_size(cls, pool_size) -> "PooledBigtableGrpcAsyncIOTransport": + def with_fixed_size(cls, pool_size) -> Type["PooledBigtableGrpcAsyncIOTransport"]: """ Creates a new class with a fixed channel pool size. From a91362f674736841ef54c08582f7084d5f034d64 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 16:23:55 -0700 Subject: [PATCH 119/349] reverted rest client --- .../services/bigtable/transports/rest.py | 126 +++++++++--------- 1 file changed, 64 insertions(+), 62 deletions(-) diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/rest.py b/google/cloud/bigtable_v2/services/bigtable/transports/rest.py index 6c786f6b3..4343fbb90 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/rest.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/rest.py @@ -365,9 +365,6 @@ class BigtableRestTransport(BigtableTransport): It sends JSON representations of protocol buffers over HTTP/1.1 - NOTE: This REST transport functionality is currently in a beta - state (preview). We welcome your feedback via an issue in this - library's source repository. Thank you! """ def __init__( @@ -387,39 +384,35 @@ def __init__( ) -> None: """Instantiate the transport. - NOTE: This REST transport functionality is currently in a beta - state (preview). We welcome your feedback via a GitHub issue in - this library's repository. Thank you! - - Args: - host (Optional[str]): - The hostname to connect to. - credentials (Optional[google.auth.credentials.Credentials]): The - authorization credentials to attach to requests. These - credentials identify the application to the service; if none - are specified, the client will attempt to ascertain the - credentials from the environment. - - credentials_file (Optional[str]): A file with credentials that can - be loaded with :func:`google.auth.load_credentials_from_file`. - This argument is ignored if ``channel`` is provided. - scopes (Optional(Sequence[str])): A list of scopes. This argument is - ignored if ``channel`` is provided. - client_cert_source_for_mtls (Callable[[], Tuple[bytes, bytes]]): Client - certificate to configure mutual TLS HTTP channel. It is ignored - if ``channel`` is provided. - quota_project_id (Optional[str]): An optional project to use for billing - and quota. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you are developing - your own client library. - always_use_jwt_access (Optional[bool]): Whether self signed JWT should - be used for service account credentials. - url_scheme: the protocol scheme for the API endpoint. Normally - "https", but for testing or local servers, - "http" can be specified. + Args: + host (Optional[str]): + The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional(Sequence[str])): A list of scopes. This argument is + ignored if ``channel`` is provided. + client_cert_source_for_mtls (Callable[[], Tuple[bytes, bytes]]): Client + certificate to configure mutual TLS HTTP channel. It is ignored + if ``channel`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you are developing + your own client library. + always_use_jwt_access (Optional[bool]): Whether self signed JWT should + be used for service account credentials. + url_scheme: the protocol scheme for the API endpoint. Normally + "https", but for testing or local servers, + "http" can be specified. """ # Run the base constructor # TODO(yon-mg): resolve other ctor params i.e. scopes, quota, etc. @@ -478,7 +471,6 @@ def __call__( request (~.bigtable.CheckAndMutateRowRequest): The request object. Request message for Bigtable.CheckAndMutateRow. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -510,7 +502,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -520,11 +512,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -580,7 +574,6 @@ def __call__( by Apache Beam BigtableIO. Request message for Bigtable.GenerateInitialChangeStreamPartitions. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -619,7 +612,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -629,11 +622,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -687,7 +682,6 @@ def __call__( request (~.bigtable.MutateRowRequest): The request object. Request message for Bigtable.MutateRow. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -717,7 +711,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -727,11 +721,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -784,7 +780,6 @@ def __call__( request (~.bigtable.MutateRowsRequest): The request object. Request message for BigtableService.MutateRows. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -814,7 +809,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -824,11 +819,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -880,7 +877,6 @@ def __call__( request (~.bigtable.PingAndWarmRequest): The request object. Request message for client connection keep-alive and warming. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -911,7 +907,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -921,11 +917,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -979,7 +977,6 @@ def __call__( The request object. NOTE: This API is intended to be used by Apache Beam BigtableIO. Request message for Bigtable.ReadChangeStream. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1012,7 +1009,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -1022,11 +1019,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1078,7 +1077,6 @@ def __call__( request (~.bigtable.ReadModifyWriteRowRequest): The request object. Request message for Bigtable.ReadModifyWriteRow. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1110,7 +1108,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -1120,11 +1118,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1177,7 +1177,6 @@ def __call__( request (~.bigtable.ReadRowsRequest): The request object. Request message for Bigtable.ReadRows. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1207,7 +1206,7 @@ def __call__( body = json_format.MessageToJson( transcoded_request["body"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) uri = transcoded_request["uri"] method = transcoded_request["method"] @@ -1217,11 +1216,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" @@ -1271,7 +1272,6 @@ def __call__( request (~.bigtable.SampleRowKeysRequest): The request object. Request message for Bigtable.SampleRowKeys. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1303,11 +1303,13 @@ def __call__( json_format.MessageToJson( transcoded_request["query_params"], including_default_value_fields=False, - use_integers_for_enums=False, + use_integers_for_enums=True, ) ) query_params.update(self._get_unset_required_fields(query_params)) + query_params["$alt"] = "json;enum-encoding=int" + # Send the request headers = dict(metadata) headers["Content-Type"] = "application/json" From d80a8c0059cb7484c3f0e920c10cc6e975eab04c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 16:38:43 -0700 Subject: [PATCH 120/349] fixed rest tests --- tests/unit/gapic/bigtable_v2/test_bigtable.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index 5e227ff82..0337d2f08 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -3382,7 +3382,7 @@ def test_read_rows_rest_required_fields(request_type=bigtable.ReadRowsRequest): iter_content.return_value = iter(json_return_value) response = client.read_rows(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -3668,7 +3668,7 @@ def test_sample_row_keys_rest_required_fields( iter_content.return_value = iter(json_return_value) response = client.sample_row_keys(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -3940,7 +3940,7 @@ def test_mutate_row_rest_required_fields(request_type=bigtable.MutateRowRequest) response = client.mutate_row(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -4235,7 +4235,7 @@ def test_mutate_rows_rest_required_fields(request_type=bigtable.MutateRowsReques iter_content.return_value = iter(json_return_value) response = client.mutate_rows(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -4522,7 +4522,7 @@ def test_check_and_mutate_row_rest_required_fields( response = client.check_and_mutate_row(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -4840,7 +4840,7 @@ def test_ping_and_warm_rest_required_fields(request_type=bigtable.PingAndWarmReq response = client.ping_and_warm(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -5106,7 +5106,7 @@ def test_read_modify_write_row_rest_required_fields( response = client.read_modify_write_row(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -5405,7 +5405,7 @@ def test_generate_initial_change_stream_partitions_rest_required_fields( iter_content.return_value = iter(json_return_value) response = client.generate_initial_change_stream_partitions(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -5704,7 +5704,7 @@ def test_read_change_stream_rest_required_fields( iter_content.return_value = iter(json_return_value) response = client.read_change_stream(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params From b888ee8fa98c07a8e9672707e419479c2bed052b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 28 Mar 2023 17:26:33 -0700 Subject: [PATCH 121/349] converted tests to pytest --- tests/unit/test_client.py | 946 +++++++++++++++++++------------------- 1 file changed, 476 insertions(+), 470 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index afc4464a5..211ec21be 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -19,6 +19,7 @@ import re from google.auth.credentials import AnonymousCredentials +import pytest # try/except added for compatibility with python < 3.8 try: @@ -32,490 +33,495 @@ ) -class TestBigtableDataClientAsync(unittest.IsolatedAsyncioTestCase): - @staticmethod - def _get_target_class(): - from google.cloud.bigtable.client import BigtableDataClient - - return BigtableDataClient - - def _make_one(self, *args, **kwargs): - return self._get_target_class()(*args, **kwargs) - - async def test_ctor(self): - expected_project = "project-id" - expected_pool_size = 11 - expected_metadata = [("a", "b")] - expected_credentials = AnonymousCredentials() - client = self._make_one( - project="project-id", - pool_size=expected_pool_size, - metadata=expected_metadata, - credentials=expected_credentials, - ) +def _get_target_class(): + from google.cloud.bigtable.client import BigtableDataClient + + return BigtableDataClient + +def _make_one(*args, **kwargs): + return _get_target_class()(*args, **kwargs) + +@pytest.mark.asyncio +async def test_ctor(): + expected_project = "project-id" + expected_pool_size = 11 + expected_metadata = [("a", "b")] + expected_credentials = AnonymousCredentials() + client = _make_one( + project="project-id", + pool_size=expected_pool_size, + metadata=expected_metadata, + credentials=expected_credentials, + ) + await asyncio.sleep(0.1) + assert client.project == expected_project + assert len(client.transport.channel_pool) == expected_pool_size + assert client.metadata == expected_metadata + assert not client._active_instances + assert len(client._channel_refresh_tasks) == expected_pool_size + assert client.transport._credentials == expected_credentials + +@pytest.mark.asyncio +async def test_ctor_super_inits(): + from google.cloud.bigtable_v2.services.bigtable.async_client import ( + BigtableAsyncClient, + ) + from google.cloud.client import _ClientProjectMixin + from google.api_core import client_options as client_options_lib + + project = "project-id" + pool_size = 11 + credentials = AnonymousCredentials() + client_options = {"api_endpoint": "foo.bar:1234"} + options_parsed = client_options_lib.from_dict(client_options) + metadata = [("a", "b")] + transport_str = f"pooled_grpc_asyncio_{pool_size}" + with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: + with mock.patch.object( + _ClientProjectMixin, "__init__" + ) as client_project_mixin_init: + try: + _make_one( + project=project, + pool_size=pool_size, + credentials=credentials, + client_options=options_parsed, + metadata=metadata, + ) + except AttributeError: + pass + # test gapic superclass init was called + assert bigtable_client_init.call_count == 1 + kwargs = bigtable_client_init.call_args[1] + assert kwargs["transport"] == transport_str + assert kwargs["credentials"] == credentials + assert kwargs["client_options"] == options_parsed + # test mixin superclass init was called + assert client_project_mixin_init.call_count == 1 + kwargs = client_project_mixin_init.call_args[1] + assert kwargs["project"] == project + assert kwargs["credentials"] == credentials + +@pytest.mark.asyncio +async def test_ctor_dict_options(): + from google.cloud.bigtable_v2.services.bigtable.async_client import ( + BigtableAsyncClient, + ) + from google.api_core.client_options import ClientOptions + + client_options = {"api_endpoint": "foo.bar:1234"} + with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: + _make_one(client_options=client_options) + bigtable_client_init.assert_called_once() + kwargs = bigtable_client_init.call_args[1] + called_options = kwargs["client_options"] + assert called_options.api_endpoint == "foo.bar:1234" + assert isinstance(called_options, ClientOptions) + + +@pytest.mark.asyncio +async def test_veneer_grpc_headers(): + # client_info should be populated with headers to + # detect as a veneer client + patch = mock.patch("google.api_core.gapic_v1.method.wrap_method") + with patch as gapic_mock: + _make_one(project="project-id") + wrapped_call_list = gapic_mock.call_args_list + assert len(wrapped_call_list) > 0 + # each wrapped call should have veneer headers + for call in wrapped_call_list: + client_info = call.kwargs["client_info"] + assert client_info is not None, f"{call} has no client_info" + wrapped_user_agent_sorted = " ".join( + sorted(client_info.to_user_agent().split(" ")) + ) + assert VENEER_HEADER_REGEX.match(wrapped_user_agent_sorted), \ + f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}" + +@pytest.mark.asyncio +async def test_channel_pool_creation(): + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) + + pool_size = 14 + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "create_channel" + ) as create_channel: + client = _make_one(project="project-id", pool_size=pool_size) + assert create_channel.call_count == pool_size + # channels should be unique + client = _make_one(project="project-id", pool_size=pool_size) + pool_list = list(client.transport.channel_pool) + pool_set = set(client.transport.channel_pool) + assert len(pool_list) == len(pool_set) + +@pytest.mark.asyncio +async def test_channel_pool_rotation(): + pool_size = 7 + client = _make_one(project="project-id", pool_size=pool_size) + assert len(client.transport.channel_pool) == pool_size + + with mock.patch.object(type(client.transport), "next_channel") as next_channel: + with mock.patch.object( + type(client.transport.channel_pool[0]), "unary_unary" + ): + # calling an rpc `pool_size` times should use a different channel each time + for i in range(pool_size): + channel_1 = client.transport.channel_pool[ + client.transport._next_idx + ] + next_channel.return_value = channel_1 + client.transport.ping_and_warm() + assert next_channel.call_count == i + 1 + channel_1.unary_unary.assert_called_once() + +@pytest.mark.asyncio +async def test_channel_pool_replace(): + pool_size = 7 + client = _make_one(project="project-id", pool_size=pool_size) + for replace_idx in range(pool_size): + start_pool = [channel for channel in client.transport.channel_pool] + grace_period = 9 + with mock.patch.object( + type(client.transport.channel_pool[0]), "close" + ) as close: + new_channel = grpc.aio.insecure_channel("localhost:8080") + await client.transport.replace_channel( + replace_idx, grace=grace_period, new_channel=new_channel + ) + close.assert_called_once_with(grace=grace_period) + close.assert_awaited_once() + assert client.transport.channel_pool[replace_idx] == new_channel + for i in range(pool_size): + if i != replace_idx: + assert client.transport.channel_pool[i] == start_pool[i] + else: + assert client.transport.channel_pool[i] != start_pool[i] + +@pytest.mark.asyncio +async def test_ctor_background_channel_refresh(): + # should create background tasks for each channel + for pool_size in [1, 3, 7]: + client = _make_one(project="project-id", pool_size=pool_size) + ping_and_warm = AsyncMock() + client._ping_and_warm_instances = ping_and_warm + assert len(client._channel_refresh_tasks) == pool_size + for task in client._channel_refresh_tasks: + assert isinstance(task, asyncio.Task) await asyncio.sleep(0.1) - self.assertEqual(client.project, expected_project) - self.assertEqual(len(client.transport.channel_pool), expected_pool_size) - self.assertEqual(client.metadata, expected_metadata) - self.assertFalse(client._active_instances) - self.assertEqual(len(client._channel_refresh_tasks), expected_pool_size) - self.assertEqual(client.transport._credentials, expected_credentials) - - async def test_ctor_super_inits(self): - from google.cloud.bigtable_v2.services.bigtable.async_client import ( - BigtableAsyncClient, - ) - from google.cloud.client import _ClientProjectMixin - from google.api_core import client_options as client_options_lib - - project = "project-id" - pool_size = 11 - credentials = AnonymousCredentials() - client_options = {"api_endpoint": "foo.bar:1234"} - options_parsed = client_options_lib.from_dict(client_options) - metadata = [("a", "b")] - transport_str = f"pooled_grpc_asyncio_{pool_size}" - with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: - with mock.patch.object( - _ClientProjectMixin, "__init__" - ) as client_project_mixin_init: + assert ping_and_warm.call_count == pool_size + for channel in client.transport.channel_pool: + ping_and_warm.assert_any_call(channel) + +@pytest.mark.asyncio +async def test__ping_and_warm_instances(): + # test with no instances + gather = AsyncMock() + asyncio.gather = gather + client = _make_one(project="project-id", pool_size=1) + channel = client.transport.channel_pool[0] + await client._ping_and_warm_instances(channel) + gather.assert_called_once() + gather.assert_awaited_once() + assert not gather.call_args.args + assert gather.call_args.kwargs == {"return_exceptions": True} + # test with instances + client._active_instances = [ + "instance-1", + "instance-2", + "instance-3", + "instance-4", + ] + gather = AsyncMock() + asyncio.gather = gather + await client._ping_and_warm_instances(channel) + gather.assert_called_once() + gather.assert_awaited_once() + assert len(gather.call_args.args) == 4 + assert gather.call_args.kwargs == {"return_exceptions": True} + for idx, call in enumerate(gather.call_args.args): + assert isinstance(call, grpc.aio.UnaryUnaryCall) + call._request["name"] = client._active_instances[idx] + +@pytest.mark.asyncio +async def test__manage_channel_first_sleep(): + # first sleep time should be `refresh_interval` seconds after client init + import time + from collections import namedtuple + + params = namedtuple( + "params", ["refresh_interval", "wait_time", "expected_sleep"] + ) + test_params = [ + params(refresh_interval=0, wait_time=0, expected_sleep=0), + params(refresh_interval=0, wait_time=1, expected_sleep=0), + params(refresh_interval=10, wait_time=0, expected_sleep=10), + params(refresh_interval=10, wait_time=5, expected_sleep=5), + params(refresh_interval=10, wait_time=10, expected_sleep=0), + params(refresh_interval=10, wait_time=15, expected_sleep=0), + ] + with mock.patch.object(time, "time") as time: + time.return_value = 0 + for refresh_interval, wait_time, expected_sleep in test_params: + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = asyncio.CancelledError try: - self._make_one( - project=project, - pool_size=pool_size, - credentials=credentials, - client_options=options_parsed, - metadata=metadata, - ) - except AttributeError: + client = _make_one(project="project-id") + client._channel_init_time = -wait_time + await client._manage_channel(0, refresh_interval) + except asyncio.CancelledError: pass - # test gapic superclass init was called - self.assertEqual(bigtable_client_init.call_count, 1) - kwargs = bigtable_client_init.call_args[1] - self.assertEqual(kwargs["transport"], transport_str) - self.assertEqual(kwargs["credentials"], credentials) - self.assertEqual(kwargs["client_options"], options_parsed) - # test mixin superclass init was called - self.assertEqual(client_project_mixin_init.call_count, 1) - kwargs = client_project_mixin_init.call_args[1] - self.assertEqual(kwargs["project"], project) - self.assertEqual(kwargs["credentials"], credentials) - - async def test_ctor_dict_options(self): - from google.cloud.bigtable_v2.services.bigtable.async_client import ( - BigtableAsyncClient, - ) - from google.api_core.client_options import ClientOptions - - client_options = {"api_endpoint": "foo.bar:1234"} - with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: - self._make_one(client_options=client_options) - bigtable_client_init.assert_called_once() - kwargs = bigtable_client_init.call_args[1] - called_options = kwargs["client_options"] - self.assertEqual(called_options.api_endpoint, "foo.bar:1234") - self.assertIsInstance(called_options, ClientOptions) - - - async def test_veneer_grpc_headers(self): - # client_info should be populated with headers to - # detect as a veneer client - patch = mock.patch("google.api_core.gapic_v1.method.wrap_method") - with patch as gapic_mock: - self._make_one(project="project-id") - wrapped_call_list = gapic_mock.call_args_list - self.assertGreater(len(wrapped_call_list), 0) - # each wrapped call should have veneer headers - for call in wrapped_call_list: - client_info = call.kwargs["client_info"] - self.assertIsNotNone(client_info, f"{call} has no client_info") - wrapped_user_agent_sorted = " ".join( - sorted(client_info.to_user_agent().split(" ")) - ) - self.assertTrue( - VENEER_HEADER_REGEX.match(wrapped_user_agent_sorted), - f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}", - ) - - async def test_channel_pool_creation(self): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledBigtableGrpcAsyncIOTransport, - ) - - pool_size = 14 + sleep.assert_called_once() + call_time = sleep.call_args[0][0] + assert abs(call_time-expected_sleep) < 0.1, f"params={params}" + +@pytest.mark.asyncio +async def test__manage_channel_ping_and_warm(): + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) + + # should ping an warm all new channels, and old channels if sleeping + client = _make_one(project="project-id") + new_channel = grpc.aio.insecure_channel("localhost:8080") + with mock.patch.object(asyncio, "sleep"): with mock.patch.object( PooledBigtableGrpcAsyncIOTransport, "create_channel" ) as create_channel: - client = self._make_one(project="project-id", pool_size=pool_size) - self.assertEqual(create_channel.call_count, pool_size) - # channels should be unique - client = self._make_one(project="project-id", pool_size=pool_size) - pool_list = list(client.transport.channel_pool) - pool_set = set(client.transport.channel_pool) - self.assertEqual(len(pool_list), len(pool_set)) - - async def test_channel_pool_rotation(self): - pool_size = 7 - client = self._make_one(project="project-id", pool_size=pool_size) - self.assertEqual(len(client.transport.channel_pool), pool_size) - - with mock.patch.object(type(client.transport), "next_channel") as next_channel: + create_channel.return_value = new_channel with mock.patch.object( - type(client.transport.channel_pool[0]), "unary_unary" - ): - # calling an rpc `pool_size` times should use a different channel each time - for i in range(pool_size): - channel_1 = client.transport.channel_pool[ - client.transport._next_idx - ] - next_channel.return_value = channel_1 - client.transport.ping_and_warm() - self.assertEqual(next_channel.call_count, i + 1) - channel_1.unary_unary.assert_called_once() - - async def test_channel_pool_replace(self): - pool_size = 7 - client = self._make_one(project="project-id", pool_size=pool_size) - for replace_idx in range(pool_size): - start_pool = [channel for channel in client.transport.channel_pool] - grace_period = 9 - with mock.patch.object( - type(client.transport.channel_pool[0]), "close" - ) as close: - new_channel = grpc.aio.insecure_channel("localhost:8080") - await client.transport.replace_channel( - replace_idx, grace=grace_period, new_channel=new_channel - ) - close.assert_called_once_with(grace=grace_period) - close.assert_awaited_once() - self.assertEqual(client.transport.channel_pool[replace_idx], new_channel) - for i in range(pool_size): - if i != replace_idx: - self.assertEqual(client.transport.channel_pool[i], start_pool[i]) - else: - self.assertNotEqual(client.transport.channel_pool[i], start_pool[i]) - - async def test_ctor_background_channel_refresh(self): - # should create background tasks for each channel - for pool_size in [1, 3, 7]: - client = self._make_one(project="project-id", pool_size=pool_size) - ping_and_warm = AsyncMock() - client._ping_and_warm_instances = ping_and_warm - self.assertEqual(len(client._channel_refresh_tasks), pool_size) - for task in client._channel_refresh_tasks: - self.assertIsInstance(task, asyncio.Task) - await asyncio.sleep(0.1) - self.assertEqual(ping_and_warm.call_count, pool_size) - for channel in client.transport.channel_pool: - ping_and_warm.assert_any_call(channel) - - async def test__ping_and_warm_instances(self): - # test with no instances - gather = AsyncMock() - asyncio.gather = gather - client = self._make_one(project="project-id", pool_size=1) - channel = client.transport.channel_pool[0] - await client._ping_and_warm_instances(channel) - gather.assert_called_once() - gather.assert_awaited_once() - self.assertFalse(gather.call_args.args) - self.assertEqual(gather.call_args.kwargs, {"return_exceptions": True}) - # test with instances - client._active_instances = [ - "instance-1", - "instance-2", - "instance-3", - "instance-4", - ] - gather = AsyncMock() - asyncio.gather = gather - await client._ping_and_warm_instances(channel) - gather.assert_called_once() - gather.assert_awaited_once() - self.assertEqual(len(gather.call_args.args), 4) - self.assertEqual(gather.call_args.kwargs, {"return_exceptions": True}) - for idx, call in enumerate(gather.call_args.args): - self.assertIsInstance(call, grpc.aio.UnaryUnaryCall) - call._request["name"] = client._active_instances[idx] - - async def test__manage_channel_first_sleep(self): - # first sleep time should be `refresh_interval` seconds after client init - import time - from collections import namedtuple - - params = namedtuple( - "params", ["refresh_interval", "wait_time", "expected_sleep"] - ) - test_params = [ - params(refresh_interval=0, wait_time=0, expected_sleep=0), - params(refresh_interval=0, wait_time=1, expected_sleep=0), - params(refresh_interval=10, wait_time=0, expected_sleep=10), - params(refresh_interval=10, wait_time=5, expected_sleep=5), - params(refresh_interval=10, wait_time=10, expected_sleep=0), - params(refresh_interval=10, wait_time=15, expected_sleep=0), - ] - with mock.patch.object(time, "time") as time: - time.return_value = 0 - for refresh_interval, wait_time, expected_sleep in test_params: - with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = asyncio.CancelledError + PooledBigtableGrpcAsyncIOTransport, "replace_channel" + ) as replace_channel: + replace_channel.side_effect = asyncio.CancelledError + # should ping and warm old channel then new if sleep > 0 + with mock.patch.object( + type(_make_one()), "_ping_and_warm_instances" + ) as ping_and_warm: try: - client = self._make_one(project="project-id") - client._channel_init_time = -wait_time - await client._manage_channel(0, refresh_interval) + channel_idx = 2 + old_channel = client.transport.channel_pool[channel_idx] + await client._manage_channel(channel_idx, 10) except asyncio.CancelledError: pass - sleep.assert_called_once() - call_time = sleep.call_args[0][0] - self.assertAlmostEqual( - call_time, expected_sleep, delta=0.1, msg=f"params={params}" - ) - - async def test__manage_channel_ping_and_warm(self): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledBigtableGrpcAsyncIOTransport, - ) - - # should ping an warm all new channels, and old channels if sleeping - client = self._make_one(project="project-id") - new_channel = grpc.aio.insecure_channel("localhost:8080") - with mock.patch.object(asyncio, "sleep"): - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "create_channel" - ) as create_channel: - create_channel.return_value = new_channel - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "replace_channel" - ) as replace_channel: - replace_channel.side_effect = asyncio.CancelledError - # should ping and warm old channel then new if sleep > 0 - with mock.patch.object( - type(self._make_one()), "_ping_and_warm_instances" - ) as ping_and_warm: - try: - channel_idx = 2 - old_channel = client.transport.channel_pool[channel_idx] - await client._manage_channel(channel_idx, 10) - except asyncio.CancelledError: - pass - self.assertEqual(ping_and_warm.call_count, 2) - self.assertNotEqual(old_channel, new_channel) - called_with = [ - call[0][0] for call in ping_and_warm.call_args_list - ] - self.assertIn(old_channel, called_with) - self.assertIn(new_channel, called_with) - # should ping and warm instantly new channel only if not sleeping - with mock.patch.object( - type(self._make_one()), "_ping_and_warm_instances" - ) as ping_and_warm: - try: - await client._manage_channel(0, 0) - except asyncio.CancelledError: - pass - ping_and_warm.assert_called_once_with(new_channel) - - async def test__manage_channel_sleeps(self): - # make sure that sleeps work as expected - from collections import namedtuple - import time - - params = namedtuple( - "params", ["refresh_interval", "num_cycles", "expected_sleep"] - ) - test_params = [ - params(refresh_interval=None, num_cycles=1, expected_sleep=60 * 45), - params(refresh_interval=10, num_cycles=10, expected_sleep=100), - params(refresh_interval=10, num_cycles=1, expected_sleep=10), - ] - channel_idx = 1 - with mock.patch.object(time, "time") as time: - time.return_value = 0 - for refresh_interval, num_cycles, expected_sleep in test_params: - with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = [None for i in range(num_cycles - 1)] + [ - asyncio.CancelledError + assert ping_and_warm.call_count == 2 + assert old_channel != new_channel + called_with = [ + call[0][0] for call in ping_and_warm.call_args_list ] + assert old_channel in called_with + assert new_channel in called_with + # should ping and warm instantly new channel only if not sleeping + with mock.patch.object( + type(_make_one()), "_ping_and_warm_instances" + ) as ping_and_warm: try: - client = self._make_one(project="project-id") - if refresh_interval is not None: - await client._manage_channel(channel_idx, refresh_interval) - else: - await client._manage_channel(channel_idx) + await client._manage_channel(0, 0) except asyncio.CancelledError: pass - self.assertEqual(sleep.call_count, num_cycles) - total_sleep = sum([call[0][0] for call in sleep.call_args_list]) - self.assertAlmostEqual( - total_sleep, - expected_sleep, - delta=0.1, - msg=f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}", - ) - - async def test__manage_channel_refresh(self): - # make sure that channels are properly refreshed - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledBigtableGrpcAsyncIOTransport, - ) - - expected_grace = 9 - expected_refresh = 0.5 - channel_idx = 1 - new_channel = grpc.aio.insecure_channel("localhost:8080") - - for num_cycles in [0, 1, 10, 100]: - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "replace_channel" - ) as replace_channel: - with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = [None for i in range(num_cycles)] + [ - asyncio.CancelledError - ] - client = self._make_one(project="project-id") - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "create_channel" - ) as create_channel: - create_channel.return_value = new_channel - try: - await client._manage_channel( - channel_idx, - refresh_interval=expected_refresh, - grace_period=expected_grace, - ) - except asyncio.CancelledError: - pass - self.assertEqual(sleep.call_count, num_cycles + 1) - self.assertEqual(create_channel.call_count, num_cycles) - self.assertEqual(replace_channel.call_count, num_cycles) - for call in replace_channel.call_args_list: - self.assertEqual(call[0][0], channel_idx) - self.assertEqual(call[0][1], expected_grace) - self.assertEqual(call[0][2], new_channel) - - async def test_register_instance_ping_and_warm(self): - # should ping and warm each new instance - pool_size = 7 - client = self._make_one(project="project-id", pool_size=pool_size) - self.assertEqual(len(client._channel_refresh_tasks), pool_size) - self.assertFalse(client._active_instances) - # next calls should trigger ping and warm + ping_and_warm.assert_called_once_with(new_channel) + +@pytest.mark.asyncio +async def test__manage_channel_sleeps(): + # make sure that sleeps work as expected + from collections import namedtuple + import time + + params = namedtuple( + "params", ["refresh_interval", "num_cycles", "expected_sleep"] + ) + test_params = [ + params(refresh_interval=None, num_cycles=1, expected_sleep=60 * 45), + params(refresh_interval=10, num_cycles=10, expected_sleep=100), + params(refresh_interval=10, num_cycles=1, expected_sleep=10), + ] + channel_idx = 1 + with mock.patch.object(time, "time") as time: + time.return_value = 0 + for refresh_interval, num_cycles, expected_sleep in test_params: + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = [None for i in range(num_cycles - 1)] + [ + asyncio.CancelledError + ] + try: + client = _make_one(project="project-id") + if refresh_interval is not None: + await client._manage_channel(channel_idx, refresh_interval) + else: + await client._manage_channel(channel_idx) + except asyncio.CancelledError: + pass + assert sleep.call_count == num_cycles + total_sleep = sum([call[0][0] for call in sleep.call_args_list]) + assert abs(total_sleep-expected_sleep) < \ + 0.1, \ + f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" + +@pytest.mark.asyncio +async def test__manage_channel_refresh(): + # make sure that channels are properly refreshed + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) + + expected_grace = 9 + expected_refresh = 0.5 + channel_idx = 1 + new_channel = grpc.aio.insecure_channel("localhost:8080") + + for num_cycles in [0, 1, 10, 100]: with mock.patch.object( - type(self._make_one()), "_ping_and_warm_instances" - ) as ping_mock: - # new instance should trigger ping and warm - await client.register_instance("instance-1") - self.assertEqual(ping_mock.call_count, pool_size) - await client.register_instance("instance-2") - self.assertEqual(ping_mock.call_count, pool_size * 2) - # duplcate instances should not trigger ping and warm - await client.register_instance("instance-2") - self.assertEqual(ping_mock.call_count, pool_size * 2) - - async def test_remove_instance_registration(self): - client = self._make_one(project="project-id") + PooledBigtableGrpcAsyncIOTransport, "replace_channel" + ) as replace_channel: + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = [None for i in range(num_cycles)] + [ + asyncio.CancelledError + ] + client = _make_one(project="project-id") + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "create_channel" + ) as create_channel: + create_channel.return_value = new_channel + try: + await client._manage_channel( + channel_idx, + refresh_interval=expected_refresh, + grace_period=expected_grace, + ) + except asyncio.CancelledError: + pass + assert sleep.call_count == num_cycles + 1 + assert create_channel.call_count == num_cycles + assert replace_channel.call_count == num_cycles + for call in replace_channel.call_args_list: + assert call[0][0] == channel_idx + assert call[0][1] == expected_grace + assert call[0][2] == new_channel + +@pytest.mark.asyncio +async def test_register_instance_ping_and_warm(): + # should ping and warm each new instance + pool_size = 7 + client = _make_one(project="project-id", pool_size=pool_size) + assert len(client._channel_refresh_tasks) == pool_size + assert not client._active_instances + # next calls should trigger ping and warm + with mock.patch.object( + type(_make_one()), "_ping_and_warm_instances" + ) as ping_mock: + # new instance should trigger ping and warm await client.register_instance("instance-1") + assert ping_mock.call_count == pool_size + await client.register_instance("instance-2") + assert ping_mock.call_count == pool_size * 2 + # duplcate instances should not trigger ping and warm await client.register_instance("instance-2") - self.assertEqual(len(client._active_instances), 2) - success = await client.remove_instance_registration("instance-1") - self.assertTrue(success) - self.assertEqual(len(client._active_instances), 1) - self.assertEqual( - client._active_instances, {"projects/project-id/instances/instance-2"} - ) - success = await client.remove_instance_registration("nonexistant") - self.assertFalse(success) - self.assertEqual(len(client._active_instances), 1) - - async def test_get_table(self): - from google.cloud.bigtable.client import Table - - client = self._make_one(project="project-id") - self.assertFalse(client._active_instances) - expected_table_id = "table-id" - expected_instance_id = "instance-id" - expected_app_profile_id = "app-profile-id" - expected_metadata = [("a", "b")] - table = client.get_table( - expected_instance_id, - expected_table_id, - expected_app_profile_id, - expected_metadata, - ) - await asyncio.sleep(0) - self.assertIsInstance(table, Table) - self.assertEqual(table.table_id, expected_table_id) - self.assertEqual(table.instance, expected_instance_id) - self.assertEqual(table.app_profile_id, expected_app_profile_id) - self.assertEqual(table.metadata, expected_metadata) - self.assertIs(table.client, client) - full_instance_name = client.instance_path(client.project, expected_instance_id) - self.assertIn(full_instance_name, client._active_instances) - - async def test_multiple_pool_sizes(self): - # should be able to create multiple clients with different pool sizes without issue - pool_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256] - for pool_size in pool_sizes: - client = self._make_one(project="project-id", pool_size=pool_size) - self.assertEqual(len(client._channel_refresh_tasks), pool_size) - client_duplicate = self._make_one(project="project-id", pool_size=pool_size) - self.assertEqual(len(client_duplicate._channel_refresh_tasks), pool_size) - self.assertIn(str(pool_size), str(client.transport)) - - -class TestSyncInitialization(unittest.TestCase): - def test_client_ctor_sync(self): - # initializing client in a sync context should raise RuntimeError - from google.cloud.bigtable.client import BigtableDataClient - - with self.assertRaises(RuntimeError) as err: - BigtableDataClient(project="project-id") - self.assertIn("no current event loop", str(err.exception)) - - def test_table_ctor_sync(self): - # initializing client in a sync context should raise RuntimeError - from google.cloud.bigtable.client import Table - - client = mock.Mock() - with self.assertRaises(RuntimeError) as err: - Table(client, "instance-id", "table-id") - self.assertEqual(str(err.exception), "no running event loop") - - -class TestTable(unittest.IsolatedAsyncioTestCase): - def _make_one(self, *args, **kwargs): - from google.cloud.bigtable.client import BigtableDataClient - - return BigtableDataClient().get_table(*args, **kwargs) - - async def test_ctor(self): - from google.cloud.bigtable.client import BigtableDataClient - from google.cloud.bigtable.client import Table - - expected_table_id = "table-id" - expected_instance_id = "instance-id" - expected_app_profile_id = "app-profile-id" - expected_metadata = [("a", "b")] - client = BigtableDataClient() - self.assertFalse(client._active_instances) - - table = Table( - client, - expected_instance_id, - expected_table_id, - expected_app_profile_id, - expected_metadata, - ) - await asyncio.sleep(0) - self.assertEqual(table.table_id, expected_table_id) - self.assertEqual(table.instance, expected_instance_id) - self.assertEqual(table.app_profile_id, expected_app_profile_id) - self.assertEqual(table.metadata, expected_metadata) - self.assertIs(table.client, client) - full_instance_name = client.instance_path(client.project, expected_instance_id) - self.assertIn(full_instance_name, client._active_instances) - # ensure task reaches completion - await table._register_instance_task - self.assertTrue(table._register_instance_task.done()) - self.assertFalse(table._register_instance_task.cancelled()) - self.assertIsNone(table._register_instance_task.exception()) + assert ping_mock.call_count == pool_size * 2 + +@pytest.mark.asyncio +async def test_remove_instance_registration(): + client = _make_one(project="project-id") + await client.register_instance("instance-1") + await client.register_instance("instance-2") + assert len(client._active_instances) == 2 + success = await client.remove_instance_registration("instance-1") + assert success + assert len(client._active_instances) == 1 + assert client._active_instances == {"projects/project-id/instances/instance-2"} + success = await client.remove_instance_registration("nonexistant") + assert not success + assert len(client._active_instances) == 1 + +@pytest.mark.asyncio +async def test_get_table(): + from google.cloud.bigtable.client import Table + + client = _make_one(project="project-id") + assert not client._active_instances + expected_table_id = "table-id" + expected_instance_id = "instance-id" + expected_app_profile_id = "app-profile-id" + expected_metadata = [("a", "b")] + table = client.get_table( + expected_instance_id, + expected_table_id, + expected_app_profile_id, + expected_metadata, + ) + await asyncio.sleep(0) + assert isinstance(table, Table) + assert table.table_id == expected_table_id + assert table.instance == expected_instance_id + assert table.app_profile_id == expected_app_profile_id + assert table.metadata == expected_metadata + assert table.client is client + full_instance_name = client.instance_path(client.project, expected_instance_id) + assert full_instance_name in client._active_instances + +@pytest.mark.asyncio +async def test_multiple_pool_sizes(): + # should be able to create multiple clients with different pool sizes without issue + pool_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256] + for pool_size in pool_sizes: + client = _make_one(project="project-id", pool_size=pool_size) + assert len(client._channel_refresh_tasks) == pool_size + client_duplicate = _make_one(project="project-id", pool_size=pool_size) + assert len(client_duplicate._channel_refresh_tasks) == pool_size + assert str(pool_size) in str(client.transport) + +def test_client_ctor_sync(): + # initializing client in a sync context should raise RuntimeError + from google.cloud.bigtable.client import BigtableDataClient + + with pytest.raises(RuntimeError) as err: + BigtableDataClient(project="project-id") + assert "event loop" in str(err.value) + + +###################################################################### +# Table Tests +###################################################################### + +@pytest.mark.asyncio +async def test_table_ctor(): + from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.client import Table + + expected_table_id = "table-id" + expected_instance_id = "instance-id" + expected_app_profile_id = "app-profile-id" + expected_metadata = [("a", "b")] + client = BigtableDataClient() + assert not client._active_instances + + table = Table( + client, + expected_instance_id, + expected_table_id, + expected_app_profile_id, + expected_metadata, + ) + await asyncio.sleep(0) + assert table.table_id == expected_table_id + assert table.instance == expected_instance_id + assert table.app_profile_id == expected_app_profile_id + assert table.metadata == expected_metadata + assert table.client is client + full_instance_name = client.instance_path(client.project, expected_instance_id) + assert full_instance_name in client._active_instances + # ensure task reaches completion + await table._register_instance_task + assert table._register_instance_task.done() + assert not table._register_instance_task.cancelled() + assert table._register_instance_task.exception() is None + +def test_table_ctor_sync(): + # initializing client in a sync context should raise RuntimeError + from google.cloud.bigtable.client import Table + + client = mock.Mock() + with pytest.raises(RuntimeError) as err: + Table(client, "instance-id", "table-id") + assert "event loop" in str(err.value) + + From 94c11877f3b031f310a7739118357b66bf9c2c57 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 29 Mar 2023 14:19:02 -0700 Subject: [PATCH 122/349] added client closure --- google/cloud/bigtable/client.py | 38 +++++++++++++- tests/unit/test_client.py | 91 ++++++++++++++++++++++++++++++++- 2 files changed, 127 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 73cb9ad8f..a7b7a8f78 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -20,6 +20,7 @@ import asyncio import grpc import time +import warnings from google.cloud.bigtable_v2.services.bigtable.client import BigtableClientMeta from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient @@ -82,6 +83,11 @@ def __init__( - RuntimeError if called outside of an async run loop context - ValueError if pool_size is less than 1 """ + # check if in async run loop context + if not asyncio.get_running_loop(): + raise RuntimeError( + "BigtableDataClient must be created within an async run loop context" + ) # set up transport in registry transport_str = f"pooled_grpc_asyncio_{pool_size}" transport = PooledBigtableGrpcAsyncIOTransport.with_fixed_size(pool_size) @@ -112,9 +118,39 @@ def __init__( self._channel_init_time = time.time() self._channel_refresh_tasks: list[asyncio.Task[None]] = [] for channel_idx in range(pool_size): - refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) + refresh_task = asyncio.create_task(self._manage_channel(channel_idx), name=f"channel_refresh_{channel_idx}") self._channel_refresh_tasks.append(refresh_task) + def __del__(self): + """ + Clean up background tasks + """ + if hasattr(self, "_channel_refresh_tasks") and self._channel_refresh_tasks: + warnings.warn( + "BigtableDataClient instance is being garbage collected without " + "being closed. Please call the close() method to ensure all " + "background tasks are cancelled." + ) + for task in self._channel_refresh_tasks: + task.cancel() + + async def close(self, timeout: float = 2.0): + """ + Cancel all background tasks + """ + for task in self._channel_refresh_tasks: + task.cancel() + group = asyncio.gather(*self._channel_refresh_tasks, return_exceptions=True) + await asyncio.wait_for(group, timeout=timeout) + await self.transport.close() + self._channel_refresh_tasks = [] + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """ + Cleanly close context manager on exit + """ + await self.close() + async def _ping_and_warm_instances( self, channel: grpc.aio.Channel ) -> list[GoogleAPICallError | None]: diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 211ec21be..0cd6650ba 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -60,6 +60,7 @@ async def test_ctor(): assert not client._active_instances assert len(client._channel_refresh_tasks) == expected_pool_size assert client.transport._credentials == expected_credentials + await client.close() @pytest.mark.asyncio async def test_ctor_super_inits(): @@ -125,7 +126,7 @@ async def test_veneer_grpc_headers(): # detect as a veneer client patch = mock.patch("google.api_core.gapic_v1.method.wrap_method") with patch as gapic_mock: - _make_one(project="project-id") + client = _make_one(project="project-id") wrapped_call_list = gapic_mock.call_args_list assert len(wrapped_call_list) > 0 # each wrapped call should have veneer headers @@ -137,6 +138,7 @@ async def test_veneer_grpc_headers(): ) assert VENEER_HEADER_REGEX.match(wrapped_user_agent_sorted), \ f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}" + await client.close() @pytest.mark.asyncio async def test_channel_pool_creation(): @@ -155,6 +157,7 @@ async def test_channel_pool_creation(): pool_list = list(client.transport.channel_pool) pool_set = set(client.transport.channel_pool) assert len(pool_list) == len(pool_set) + await client.close() @pytest.mark.asyncio async def test_channel_pool_rotation(): @@ -175,6 +178,7 @@ async def test_channel_pool_rotation(): client.transport.ping_and_warm() assert next_channel.call_count == i + 1 channel_1.unary_unary.assert_called_once() + await client.close() @pytest.mark.asyncio async def test_channel_pool_replace(): @@ -198,6 +202,7 @@ async def test_channel_pool_replace(): assert client.transport.channel_pool[i] == start_pool[i] else: assert client.transport.channel_pool[i] != start_pool[i] + await client.close() @pytest.mark.asyncio async def test_ctor_background_channel_refresh(): @@ -213,6 +218,7 @@ async def test_ctor_background_channel_refresh(): assert ping_and_warm.call_count == pool_size for channel in client.transport.channel_pool: ping_and_warm.assert_any_call(channel) + await client.close() @pytest.mark.asyncio async def test__ping_and_warm_instances(): @@ -243,6 +249,7 @@ async def test__ping_and_warm_instances(): for idx, call in enumerate(gather.call_args.args): assert isinstance(call, grpc.aio.UnaryUnaryCall) call._request["name"] = client._active_instances[idx] + await client.close() @pytest.mark.asyncio async def test__manage_channel_first_sleep(): @@ -275,6 +282,7 @@ async def test__manage_channel_first_sleep(): sleep.assert_called_once() call_time = sleep.call_args[0][0] assert abs(call_time-expected_sleep) < 0.1, f"params={params}" + await client.close() @pytest.mark.asyncio async def test__manage_channel_ping_and_warm(): @@ -320,6 +328,7 @@ async def test__manage_channel_ping_and_warm(): except asyncio.CancelledError: pass ping_and_warm.assert_called_once_with(new_channel) + await client.close() @pytest.mark.asyncio async def test__manage_channel_sleeps(): @@ -356,6 +365,7 @@ async def test__manage_channel_sleeps(): assert abs(total_sleep-expected_sleep) < \ 0.1, \ f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" + await client.close() @pytest.mark.asyncio async def test__manage_channel_refresh(): @@ -397,6 +407,7 @@ async def test__manage_channel_refresh(): assert call[0][0] == channel_idx assert call[0][1] == expected_grace assert call[0][2] == new_channel + await client.close() @pytest.mark.asyncio async def test_register_instance_ping_and_warm(): @@ -417,6 +428,7 @@ async def test_register_instance_ping_and_warm(): # duplcate instances should not trigger ping and warm await client.register_instance("instance-2") assert ping_mock.call_count == pool_size * 2 + await client.close() @pytest.mark.asyncio async def test_remove_instance_registration(): @@ -431,6 +443,7 @@ async def test_remove_instance_registration(): success = await client.remove_instance_registration("nonexistant") assert not success assert len(client._active_instances) == 1 + await client.close() @pytest.mark.asyncio async def test_get_table(): @@ -457,6 +470,7 @@ async def test_get_table(): assert table.client is client full_instance_name = client.instance_path(client.project, expected_instance_id) assert full_instance_name in client._active_instances + await client.close() @pytest.mark.asyncio async def test_multiple_pool_sizes(): @@ -468,6 +482,80 @@ async def test_multiple_pool_sizes(): client_duplicate = _make_one(project="project-id", pool_size=pool_size) assert len(client_duplicate._channel_refresh_tasks) == pool_size assert str(pool_size) in str(client.transport) + await client.close() + await client_duplicate.close() + +@pytest.mark.asyncio +async def test_close(): + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import PooledBigtableGrpcAsyncIOTransport + pool_size = 7 + client = _make_one(project="project-id", pool_size=pool_size) + assert len(client._channel_refresh_tasks) == pool_size + tasks_list = list(client._channel_refresh_tasks) + for task in client._channel_refresh_tasks: + assert not task.done() + with mock.patch.object(PooledBigtableGrpcAsyncIOTransport, "close", AsyncMock()) as close_mock: + await client.close() + close_mock.assert_called_once() + close_mock.assert_awaited() + for task in tasks_list: + assert task.done() + assert task.cancelled() + assert client._channel_refresh_tasks == [] + +@pytest.mark.asyncio +async def test_close_with_timeout(): + pool_size = 7 + expected_timeout = 19 + client = _make_one(project="project-id", pool_size=pool_size) + with mock.patch.object(asyncio, "wait_for") as wait_for_mock: + await client.close(timeout=expected_timeout) + wait_for_mock.assert_called_once() + wait_for_mock.assert_awaited() + assert wait_for_mock.call_args[1]["timeout"] == expected_timeout + +@pytest.mark.asyncio +async def test___del__(): + # no warnings on __del__ after close + pool_size = 7 + client = _make_one(project="project-id", pool_size=pool_size) + await client.close() + +@pytest.mark.asyncio +@pytest.mark.filterwarnings("ignore::UserWarning") +async def test___del____no_close(): + import warnings + # if client is garbage collected before being closed, it should raise a warning + pool_size = 7 + client = _make_one(project="project-id", pool_size=pool_size) + # replace tasks with mocks + await client.close() + client._channel_refresh_tasks = [mock.Mock() for i in range(pool_size)] + assert len(client._channel_refresh_tasks) == pool_size + with pytest.warns(UserWarning) as warnings: + client.__del__() + assert len(warnings) == 1 + assert "Please call the close() method" in str(warnings[0].message) + for i in range(pool_size): + assert client._channel_refresh_tasks[i].cancel.call_count == 1 + +@pytest.mark.asyncio +async def test_context_manager(): + # context manager should close the client cleanly + close_mock = AsyncMock() + true_close = None + async with _make_one(project="project-id") as client: + true_close = client.close() + client.close = close_mock + for task in client._channel_refresh_tasks: + assert not task.done() + assert client.project == "project-id" + assert client._active_instances == set() + close_mock.assert_not_called() + close_mock.assert_called_once() + close_mock.assert_awaited() + # actually close the client + await true_close def test_client_ctor_sync(): # initializing client in a sync context should raise RuntimeError @@ -514,6 +602,7 @@ async def test_table_ctor(): assert table._register_instance_task.done() assert not table._register_instance_task.cancelled() assert table._register_instance_task.exception() is None + await client.close() def test_table_ctor_sync(): # initializing client in a sync context should raise RuntimeError From 4c02e6c310deaf1ddf0bdd7b3245e92674e198b2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 29 Mar 2023 14:21:45 -0700 Subject: [PATCH 123/349] ran blacken --- google/cloud/bigtable/client.py | 4 +- tests/unit/test_client.py | 74 ++++++++++++++++++++------------- 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index a7b7a8f78..e8b6e9ff0 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -118,7 +118,9 @@ def __init__( self._channel_init_time = time.time() self._channel_refresh_tasks: list[asyncio.Task[None]] = [] for channel_idx in range(pool_size): - refresh_task = asyncio.create_task(self._manage_channel(channel_idx), name=f"channel_refresh_{channel_idx}") + refresh_task = asyncio.create_task( + self._manage_channel(channel_idx), name=f"channel_refresh_{channel_idx}" + ) self._channel_refresh_tasks.append(refresh_task) def __del__(self): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 0cd6650ba..f157f6ee6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -38,9 +38,11 @@ def _get_target_class(): return BigtableDataClient + def _make_one(*args, **kwargs): return _get_target_class()(*args, **kwargs) + @pytest.mark.asyncio async def test_ctor(): expected_project = "project-id" @@ -62,6 +64,7 @@ async def test_ctor(): assert client.transport._credentials == expected_credentials await client.close() + @pytest.mark.asyncio async def test_ctor_super_inits(): from google.cloud.bigtable_v2.services.bigtable.async_client import ( @@ -103,6 +106,7 @@ async def test_ctor_super_inits(): assert kwargs["project"] == project assert kwargs["credentials"] == credentials + @pytest.mark.asyncio async def test_ctor_dict_options(): from google.cloud.bigtable_v2.services.bigtable.async_client import ( @@ -136,10 +140,12 @@ async def test_veneer_grpc_headers(): wrapped_user_agent_sorted = " ".join( sorted(client_info.to_user_agent().split(" ")) ) - assert VENEER_HEADER_REGEX.match(wrapped_user_agent_sorted), \ - f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}" + assert VENEER_HEADER_REGEX.match( + wrapped_user_agent_sorted + ), f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}" await client.close() + @pytest.mark.asyncio async def test_channel_pool_creation(): from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( @@ -159,6 +165,7 @@ async def test_channel_pool_creation(): assert len(pool_list) == len(pool_set) await client.close() + @pytest.mark.asyncio async def test_channel_pool_rotation(): pool_size = 7 @@ -166,20 +173,17 @@ async def test_channel_pool_rotation(): assert len(client.transport.channel_pool) == pool_size with mock.patch.object(type(client.transport), "next_channel") as next_channel: - with mock.patch.object( - type(client.transport.channel_pool[0]), "unary_unary" - ): + with mock.patch.object(type(client.transport.channel_pool[0]), "unary_unary"): # calling an rpc `pool_size` times should use a different channel each time for i in range(pool_size): - channel_1 = client.transport.channel_pool[ - client.transport._next_idx - ] + channel_1 = client.transport.channel_pool[client.transport._next_idx] next_channel.return_value = channel_1 client.transport.ping_and_warm() assert next_channel.call_count == i + 1 channel_1.unary_unary.assert_called_once() await client.close() + @pytest.mark.asyncio async def test_channel_pool_replace(): pool_size = 7 @@ -204,6 +208,7 @@ async def test_channel_pool_replace(): assert client.transport.channel_pool[i] != start_pool[i] await client.close() + @pytest.mark.asyncio async def test_ctor_background_channel_refresh(): # should create background tasks for each channel @@ -220,6 +225,7 @@ async def test_ctor_background_channel_refresh(): ping_and_warm.assert_any_call(channel) await client.close() + @pytest.mark.asyncio async def test__ping_and_warm_instances(): # test with no instances @@ -251,15 +257,14 @@ async def test__ping_and_warm_instances(): call._request["name"] = client._active_instances[idx] await client.close() + @pytest.mark.asyncio async def test__manage_channel_first_sleep(): # first sleep time should be `refresh_interval` seconds after client init import time from collections import namedtuple - params = namedtuple( - "params", ["refresh_interval", "wait_time", "expected_sleep"] - ) + params = namedtuple("params", ["refresh_interval", "wait_time", "expected_sleep"]) test_params = [ params(refresh_interval=0, wait_time=0, expected_sleep=0), params(refresh_interval=0, wait_time=1, expected_sleep=0), @@ -281,9 +286,10 @@ async def test__manage_channel_first_sleep(): pass sleep.assert_called_once() call_time = sleep.call_args[0][0] - assert abs(call_time-expected_sleep) < 0.1, f"params={params}" + assert abs(call_time - expected_sleep) < 0.1, f"params={params}" await client.close() + @pytest.mark.asyncio async def test__manage_channel_ping_and_warm(): from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( @@ -314,9 +320,7 @@ async def test__manage_channel_ping_and_warm(): pass assert ping_and_warm.call_count == 2 assert old_channel != new_channel - called_with = [ - call[0][0] for call in ping_and_warm.call_args_list - ] + called_with = [call[0][0] for call in ping_and_warm.call_args_list] assert old_channel in called_with assert new_channel in called_with # should ping and warm instantly new channel only if not sleeping @@ -330,15 +334,14 @@ async def test__manage_channel_ping_and_warm(): ping_and_warm.assert_called_once_with(new_channel) await client.close() + @pytest.mark.asyncio async def test__manage_channel_sleeps(): # make sure that sleeps work as expected from collections import namedtuple import time - params = namedtuple( - "params", ["refresh_interval", "num_cycles", "expected_sleep"] - ) + params = namedtuple("params", ["refresh_interval", "num_cycles", "expected_sleep"]) test_params = [ params(refresh_interval=None, num_cycles=1, expected_sleep=60 * 45), params(refresh_interval=10, num_cycles=10, expected_sleep=100), @@ -362,11 +365,12 @@ async def test__manage_channel_sleeps(): pass assert sleep.call_count == num_cycles total_sleep = sum([call[0][0] for call in sleep.call_args_list]) - assert abs(total_sleep-expected_sleep) < \ - 0.1, \ - f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" + assert ( + abs(total_sleep - expected_sleep) < 0.1 + ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" await client.close() + @pytest.mark.asyncio async def test__manage_channel_refresh(): # make sure that channels are properly refreshed @@ -409,6 +413,7 @@ async def test__manage_channel_refresh(): assert call[0][2] == new_channel await client.close() + @pytest.mark.asyncio async def test_register_instance_ping_and_warm(): # should ping and warm each new instance @@ -417,9 +422,7 @@ async def test_register_instance_ping_and_warm(): assert len(client._channel_refresh_tasks) == pool_size assert not client._active_instances # next calls should trigger ping and warm - with mock.patch.object( - type(_make_one()), "_ping_and_warm_instances" - ) as ping_mock: + with mock.patch.object(type(_make_one()), "_ping_and_warm_instances") as ping_mock: # new instance should trigger ping and warm await client.register_instance("instance-1") assert ping_mock.call_count == pool_size @@ -430,6 +433,7 @@ async def test_register_instance_ping_and_warm(): assert ping_mock.call_count == pool_size * 2 await client.close() + @pytest.mark.asyncio async def test_remove_instance_registration(): client = _make_one(project="project-id") @@ -445,6 +449,7 @@ async def test_remove_instance_registration(): assert len(client._active_instances) == 1 await client.close() + @pytest.mark.asyncio async def test_get_table(): from google.cloud.bigtable.client import Table @@ -472,6 +477,7 @@ async def test_get_table(): assert full_instance_name in client._active_instances await client.close() + @pytest.mark.asyncio async def test_multiple_pool_sizes(): # should be able to create multiple clients with different pool sizes without issue @@ -485,16 +491,22 @@ async def test_multiple_pool_sizes(): await client.close() await client_duplicate.close() + @pytest.mark.asyncio async def test_close(): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import PooledBigtableGrpcAsyncIOTransport + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) + pool_size = 7 client = _make_one(project="project-id", pool_size=pool_size) assert len(client._channel_refresh_tasks) == pool_size tasks_list = list(client._channel_refresh_tasks) for task in client._channel_refresh_tasks: assert not task.done() - with mock.patch.object(PooledBigtableGrpcAsyncIOTransport, "close", AsyncMock()) as close_mock: + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "close", AsyncMock() + ) as close_mock: await client.close() close_mock.assert_called_once() close_mock.assert_awaited() @@ -503,6 +515,7 @@ async def test_close(): assert task.cancelled() assert client._channel_refresh_tasks == [] + @pytest.mark.asyncio async def test_close_with_timeout(): pool_size = 7 @@ -514,6 +527,7 @@ async def test_close_with_timeout(): wait_for_mock.assert_awaited() assert wait_for_mock.call_args[1]["timeout"] == expected_timeout + @pytest.mark.asyncio async def test___del__(): # no warnings on __del__ after close @@ -521,10 +535,12 @@ async def test___del__(): client = _make_one(project="project-id", pool_size=pool_size) await client.close() + @pytest.mark.asyncio @pytest.mark.filterwarnings("ignore::UserWarning") async def test___del____no_close(): import warnings + # if client is garbage collected before being closed, it should raise a warning pool_size = 7 client = _make_one(project="project-id", pool_size=pool_size) @@ -539,6 +555,7 @@ async def test___del____no_close(): for i in range(pool_size): assert client._channel_refresh_tasks[i].cancel.call_count == 1 + @pytest.mark.asyncio async def test_context_manager(): # context manager should close the client cleanly @@ -557,6 +574,7 @@ async def test_context_manager(): # actually close the client await true_close + def test_client_ctor_sync(): # initializing client in a sync context should raise RuntimeError from google.cloud.bigtable.client import BigtableDataClient @@ -570,6 +588,7 @@ def test_client_ctor_sync(): # Table Tests ###################################################################### + @pytest.mark.asyncio async def test_table_ctor(): from google.cloud.bigtable.client import BigtableDataClient @@ -604,6 +623,7 @@ async def test_table_ctor(): assert table._register_instance_task.exception() is None await client.close() + def test_table_ctor_sync(): # initializing client in a sync context should raise RuntimeError from google.cloud.bigtable.client import Table @@ -612,5 +632,3 @@ def test_table_ctor_sync(): with pytest.raises(RuntimeError) as err: Table(client, "instance-id", "table-id") assert "event loop" in str(err.value) - - From d65b432cea21fefbed6b925e6b62bf80d239a747 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 29 Mar 2023 14:28:00 -0700 Subject: [PATCH 124/349] use paramaterize in tests --- tests/unit/test_client.py | 105 +++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 53 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f157f6ee6..abc1c0e9b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -259,36 +259,35 @@ async def test__ping_and_warm_instances(): @pytest.mark.asyncio -async def test__manage_channel_first_sleep(): +@pytest.mark.parametrize( + "refresh_interval, wait_time, expected_sleep", + [ + (0,0,0), + (0,1,0), + (10,0,10), + (10,5,5), + (10,10,0), + (10,15,0), + ] +) +async def test__manage_channel_first_sleep(refresh_interval, wait_time, expected_sleep): # first sleep time should be `refresh_interval` seconds after client init import time - from collections import namedtuple - - params = namedtuple("params", ["refresh_interval", "wait_time", "expected_sleep"]) - test_params = [ - params(refresh_interval=0, wait_time=0, expected_sleep=0), - params(refresh_interval=0, wait_time=1, expected_sleep=0), - params(refresh_interval=10, wait_time=0, expected_sleep=10), - params(refresh_interval=10, wait_time=5, expected_sleep=5), - params(refresh_interval=10, wait_time=10, expected_sleep=0), - params(refresh_interval=10, wait_time=15, expected_sleep=0), - ] with mock.patch.object(time, "time") as time: time.return_value = 0 - for refresh_interval, wait_time, expected_sleep in test_params: - with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = asyncio.CancelledError - try: - client = _make_one(project="project-id") - client._channel_init_time = -wait_time - await client._manage_channel(0, refresh_interval) - except asyncio.CancelledError: - pass - sleep.assert_called_once() - call_time = sleep.call_args[0][0] - assert abs(call_time - expected_sleep) < 0.1, f"params={params}" - await client.close() - + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = asyncio.CancelledError + try: + client = _make_one(project="project-id") + client._channel_init_time = -wait_time + await client._manage_channel(0, refresh_interval) + except asyncio.CancelledError: + pass + sleep.assert_called_once() + call_time = sleep.call_args[0][0] + assert abs(call_time - expected_sleep) < 0.1, \ + f"refresh_interval: {refresh_interval}, wait_time: {wait_time}, expected_sleep: {expected_sleep}" + await client.close() @pytest.mark.asyncio async def test__manage_channel_ping_and_warm(): @@ -336,39 +335,39 @@ async def test__manage_channel_ping_and_warm(): @pytest.mark.asyncio -async def test__manage_channel_sleeps(): +@pytest.mark.parametrize( + "refresh_interval, num_cycles, expected_sleep", + [ + (None, 1, 60*45), + (10, 10, 100), + (10, 1, 10), + ] +) +async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sleep): # make sure that sleeps work as expected - from collections import namedtuple import time - params = namedtuple("params", ["refresh_interval", "num_cycles", "expected_sleep"]) - test_params = [ - params(refresh_interval=None, num_cycles=1, expected_sleep=60 * 45), - params(refresh_interval=10, num_cycles=10, expected_sleep=100), - params(refresh_interval=10, num_cycles=1, expected_sleep=10), - ] channel_idx = 1 with mock.patch.object(time, "time") as time: time.return_value = 0 - for refresh_interval, num_cycles, expected_sleep in test_params: - with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = [None for i in range(num_cycles - 1)] + [ - asyncio.CancelledError - ] - try: - client = _make_one(project="project-id") - if refresh_interval is not None: - await client._manage_channel(channel_idx, refresh_interval) - else: - await client._manage_channel(channel_idx) - except asyncio.CancelledError: - pass - assert sleep.call_count == num_cycles - total_sleep = sum([call[0][0] for call in sleep.call_args_list]) - assert ( - abs(total_sleep - expected_sleep) < 0.1 - ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" - await client.close() + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = [None for i in range(num_cycles - 1)] + [ + asyncio.CancelledError + ] + try: + client = _make_one(project="project-id") + if refresh_interval is not None: + await client._manage_channel(channel_idx, refresh_interval) + else: + await client._manage_channel(channel_idx) + except asyncio.CancelledError: + pass + assert sleep.call_count == num_cycles + total_sleep = sum([call[0][0] for call in sleep.call_args_list]) + assert ( + abs(total_sleep - expected_sleep) < 0.1 + ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" + await client.close() @pytest.mark.asyncio From 4b63d8748e165588fa59b2056f124dc60d238fde Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 29 Mar 2023 15:21:15 -0700 Subject: [PATCH 125/349] improved some tests --- google/cloud/bigtable/client.py | 32 +++++++++------- tests/unit/test_client.py | 67 +++++++++++++++++---------------- 2 files changed, 53 insertions(+), 46 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index e8b6e9ff0..14b6021bc 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -123,9 +123,19 @@ def __init__( ) self._channel_refresh_tasks.append(refresh_task) + @property + def transport(self) -> PooledBigtableGrpcAsyncIOTransport: + """Returns the transport used by the client instance. + Returns: + BigtableTransport: The transport used by the client instance. + """ + return cast(PooledBigtableGrpcAsyncIOTransport, self._client.transport) + def __del__(self): """ - Clean up background tasks + Call close on garbage collection + + Raise warming if background tasks are still running """ if hasattr(self, "_channel_refresh_tasks") and self._channel_refresh_tasks: warnings.warn( @@ -133,8 +143,6 @@ def __del__(self): "being closed. Please call the close() method to ensure all " "background tasks are cancelled." ) - for task in self._channel_refresh_tasks: - task.cancel() async def close(self, timeout: float = 2.0): """ @@ -192,23 +200,22 @@ async def _manage_channel( grace_period: time to allow previous channel to serve existing requests before closing, in seconds """ - transport = cast(PooledBigtableGrpcAsyncIOTransport, self.transport) first_refresh = self._channel_init_time + refresh_interval next_sleep = max(first_refresh - time.time(), 0) if next_sleep > 0: # warm the current channel immediately - channel = transport.channel_pool[channel_idx] + channel = self.transport.channel_pool[channel_idx] await self._ping_and_warm_instances(channel) # continuously refresh the channel every `refresh_interval` seconds while True: await asyncio.sleep(next_sleep) # prepare new channel for use - new_channel = transport.create_channel( + new_channel = self.transport.create_channel( self.transport._host, - credentials=transport._credentials, - scopes=transport._scopes, - ssl_credentials=transport._ssl_channel_credentials, - quota_project_id=transport._quota_project_id, + credentials=self.transport._credentials, + scopes=self.transport._scopes, + ssl_credentials=self.transport._ssl_channel_credentials, + quota_project_id=self.transport._quota_project_id, options=[ ("grpc.max_send_message_length", -1), ("grpc.max_receive_message_length", -1), @@ -217,7 +224,7 @@ async def _manage_channel( await self._ping_and_warm_instances(new_channel) # cycle channel out of use, with long grace window before closure start_timestamp = time.time() - await transport.replace_channel(channel_idx, grace_period, new_channel) + await self.transport.replace_channel(channel_idx, grace_period, new_channel) # subtract the time spent waiting for the channel to be replaced next_sleep = refresh_interval - (time.time() - start_timestamp) @@ -229,12 +236,11 @@ async def register_instance(self, instance_id: str): requests, and new channels will be warmed for each registered instance Channels will not be refreshed unless at least one instance is registered """ - transport = cast(PooledBigtableGrpcAsyncIOTransport, self.transport) instance_name = self.instance_path(self.project, instance_id) if instance_name not in self._active_instances: self._active_instances.add(instance_name) # call ping and warm on all existing channels - for channel in transport.channel_pool: + for channel in self.transport.channel_pool: await self._ping_and_warm_instances(channel) async def remove_instance_registration(self, instance_id: str) -> bool: diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index abc1c0e9b..63a55e3ba 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -156,8 +156,10 @@ async def test_channel_pool_creation(): with mock.patch.object( PooledBigtableGrpcAsyncIOTransport, "create_channel" ) as create_channel: + create_channel.return_value = AsyncMock() client = _make_one(project="project-id", pool_size=pool_size) assert create_channel.call_count == pool_size + await client.close() # channels should be unique client = _make_one(project="project-id", pool_size=pool_size) pool_list = list(client.transport.channel_pool) @@ -229,32 +231,30 @@ async def test_ctor_background_channel_refresh(): @pytest.mark.asyncio async def test__ping_and_warm_instances(): # test with no instances - gather = AsyncMock() - asyncio.gather = gather - client = _make_one(project="project-id", pool_size=1) - channel = client.transport.channel_pool[0] - await client._ping_and_warm_instances(channel) - gather.assert_called_once() - gather.assert_awaited_once() - assert not gather.call_args.args - assert gather.call_args.kwargs == {"return_exceptions": True} - # test with instances - client._active_instances = [ - "instance-1", - "instance-2", - "instance-3", - "instance-4", - ] - gather = AsyncMock() - asyncio.gather = gather - await client._ping_and_warm_instances(channel) - gather.assert_called_once() - gather.assert_awaited_once() - assert len(gather.call_args.args) == 4 - assert gather.call_args.kwargs == {"return_exceptions": True} - for idx, call in enumerate(gather.call_args.args): - assert isinstance(call, grpc.aio.UnaryUnaryCall) - call._request["name"] = client._active_instances[idx] + with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: + client = _make_one(project="project-id", pool_size=1) + channel = client.transport.channel_pool[0] + await client._ping_and_warm_instances(channel) + gather.assert_called_once() + gather.assert_awaited_once() + assert not gather.call_args.args + assert gather.call_args.kwargs == {"return_exceptions": True} + # test with instances + client._active_instances = [ + "instance-1", + "instance-2", + "instance-3", + "instance-4", + ] + with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: + await client._ping_and_warm_instances(channel) + gather.assert_called_once() + gather.assert_awaited_once() + assert len(gather.call_args.args) == 4 + assert gather.call_args.kwargs == {"return_exceptions": True} + for idx, call in enumerate(gather.call_args.args): + assert isinstance(call, grpc.aio.UnaryUnaryCall) + call._request["name"] = client._active_instances[idx] await client.close() @@ -367,7 +367,9 @@ async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sle assert ( abs(total_sleep - expected_sleep) < 0.1 ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" - await client.close() + print(client._channel_refresh_tasks) + breakpoint() + await client.close() @pytest.mark.asyncio @@ -520,11 +522,14 @@ async def test_close_with_timeout(): pool_size = 7 expected_timeout = 19 client = _make_one(project="project-id", pool_size=pool_size) - with mock.patch.object(asyncio, "wait_for") as wait_for_mock: + tasks = list(client._channel_refresh_tasks) + with mock.patch.object(asyncio, "wait_for", AsyncMock()) as wait_for_mock: await client.close(timeout=expected_timeout) wait_for_mock.assert_called_once() wait_for_mock.assert_awaited() assert wait_for_mock.call_args[1]["timeout"] == expected_timeout + client._channel_refresh_tasks = tasks + await client.close() @pytest.mark.asyncio @@ -543,16 +548,12 @@ async def test___del____no_close(): # if client is garbage collected before being closed, it should raise a warning pool_size = 7 client = _make_one(project="project-id", pool_size=pool_size) - # replace tasks with mocks - await client.close() - client._channel_refresh_tasks = [mock.Mock() for i in range(pool_size)] assert len(client._channel_refresh_tasks) == pool_size with pytest.warns(UserWarning) as warnings: client.__del__() assert len(warnings) == 1 assert "Please call the close() method" in str(warnings[0].message) - for i in range(pool_size): - assert client._channel_refresh_tasks[i].cancel.call_count == 1 + await client.close() @pytest.mark.asyncio From 4ccc42115728b289087cc85497ec7652cfea6e4f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 29 Mar 2023 15:49:23 -0700 Subject: [PATCH 126/349] went back to init without event loop raising warning --- google/cloud/bigtable/client.py | 55 ++++++++++----- tests/unit/test_client.py | 116 ++++++++++++++++++++++---------- 2 files changed, 120 insertions(+), 51 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 14b6021bc..d8d7865d9 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -83,11 +83,6 @@ def __init__( - RuntimeError if called outside of an async run loop context - ValueError if pool_size is less than 1 """ - # check if in async run loop context - if not asyncio.get_running_loop(): - raise RuntimeError( - "BigtableDataClient must be created within an async run loop context" - ) # set up transport in registry transport_str = f"pooled_grpc_asyncio_{pool_size}" transport = PooledBigtableGrpcAsyncIOTransport.with_fixed_size(pool_size) @@ -111,17 +106,32 @@ def __init__( client_options=client_options, client_info=client_info, ) - self.metadata = metadata + self.metadata = metadata or [] # keep track of active instances to for warmup on channel refresh self._active_instances: Set[str] = set() # attempt to start background tasks self._channel_init_time = time.time() self._channel_refresh_tasks: list[asyncio.Task[None]] = [] - for channel_idx in range(pool_size): - refresh_task = asyncio.create_task( - self._manage_channel(channel_idx), name=f"channel_refresh_{channel_idx}" + try: + self.start_background_channel_refresh() + except RuntimeError: + warnings.warn( + "BigtableDataClient should be started in an " + "asyncio event loop. Channel refresh will not be started" ) - self._channel_refresh_tasks.append(refresh_task) + + def start_background_channel_refresh(self) -> None: + """ + Starts a background task to ping and warm each channel in the pool + Raises: + - RuntimeError if not called in an asyncio event loop + """ + if not self._channel_refresh_tasks: + # raise RuntimeError if there is no event loop + asyncio.get_running_loop() + for channel_idx in range(len(self.transport.channel_pool)): + refresh_task = asyncio.create_task(self._manage_channel(channel_idx), name=f"BigtableDataClient channel refresh {channel_idx}") + self._channel_refresh_tasks.append(refresh_task) @property def transport(self) -> PooledBigtableGrpcAsyncIOTransport: @@ -239,9 +249,14 @@ async def register_instance(self, instance_id: str): instance_name = self.instance_path(self.project, instance_id) if instance_name not in self._active_instances: self._active_instances.add(instance_name) - # call ping and warm on all existing channels - for channel in self.transport.channel_pool: - await self._ping_and_warm_instances(channel) + if self._channel_refresh_tasks: + # refresh tasks already running + # call ping and warm on all existing channels + for channel in self.transport.channel_pool: + await self._ping_and_warm_instances(channel) + else: + # refresh tasks aren't active. start them as background tasks + self.start_background_channel_refresh() async def remove_instance_registration(self, instance_id: str) -> bool: """ @@ -320,11 +335,17 @@ def __init__( self.instance = instance_id self.table_id = table_id self.app_profile_id = app_profile_id - self.metadata = metadata + self.metadata = metadata or [] # raises RuntimeError if called outside of an async run loop context - self._register_instance_task = asyncio.create_task( - self.client.register_instance(instance_id) - ) + try: + self._register_instance_task = asyncio.create_task( + self.client.register_instance(instance_id) + ) + except RuntimeError: + warnings.warn( + "Table should be created in an asyncio event loop." + " Instance will not be registered with client for refresh" + ) async def read_rows_stream( self, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 63a55e3ba..641c6ccaf 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -113,16 +113,18 @@ async def test_ctor_dict_options(): BigtableAsyncClient, ) from google.api_core.client_options import ClientOptions + from google.cloud.bigtable.client import BigtableDataClient client_options = {"api_endpoint": "foo.bar:1234"} - with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: - _make_one(client_options=client_options) - bigtable_client_init.assert_called_once() - kwargs = bigtable_client_init.call_args[1] - called_options = kwargs["client_options"] - assert called_options.api_endpoint == "foo.bar:1234" - assert isinstance(called_options, ClientOptions) - + with mock.patch.object(BigtableDataClient, "start_background_channel_refresh") as start_background_refresh: + with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: + _make_one(client_options=client_options) + bigtable_client_init.assert_called_once() + kwargs = bigtable_client_init.call_args[1] + called_options = kwargs["client_options"] + assert called_options.api_endpoint == "foo.bar:1234" + assert isinstance(called_options, ClientOptions) + start_background_refresh.assert_called_once() @pytest.mark.asyncio async def test_veneer_grpc_headers(): @@ -210,22 +212,37 @@ async def test_channel_pool_replace(): assert client.transport.channel_pool[i] != start_pool[i] await client.close() +def test_start_background_channel_refresh_sync(): + # should raise RuntimeError if called in a sync context + client = _make_one(project="project-id") + with pytest.raises(RuntimeError): + client.start_background_channel_refresh() + +@pytest.mark.asyncio +async def test_start_background_channel_refresh_tasks_exist(): + # if tasks exist, should do nothing + client = _make_one(project="project-id") + with mock.patch.object(asyncio, "create_task") as create_task: + client.start_background_channel_refresh() + create_task.assert_not_called() + await client.close() @pytest.mark.asyncio -async def test_ctor_background_channel_refresh(): +@pytest.mark.parametrize("pool_size", [1, 3, 7]) +async def test_start_background_channel_refresh(pool_size): # should create background tasks for each channel - for pool_size in [1, 3, 7]: - client = _make_one(project="project-id", pool_size=pool_size) - ping_and_warm = AsyncMock() - client._ping_and_warm_instances = ping_and_warm - assert len(client._channel_refresh_tasks) == pool_size - for task in client._channel_refresh_tasks: - assert isinstance(task, asyncio.Task) - await asyncio.sleep(0.1) - assert ping_and_warm.call_count == pool_size - for channel in client.transport.channel_pool: - ping_and_warm.assert_any_call(channel) - await client.close() + client = _make_one(project="project-id", pool_size=pool_size) + ping_and_warm = AsyncMock() + client._ping_and_warm_instances = ping_and_warm + client.start_background_channel_refresh() + assert len(client._channel_refresh_tasks) == pool_size + for task in client._channel_refresh_tasks: + assert isinstance(task, asyncio.Task) + await asyncio.sleep(0.1) + assert ping_and_warm.call_count == pool_size + for channel in client.transport.channel_pool: + ping_and_warm.assert_any_call(channel) + await client.close() @pytest.mark.asyncio @@ -367,8 +384,6 @@ async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sle assert ( abs(total_sleep - expected_sleep) < 0.1 ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" - print(client._channel_refresh_tasks) - breakpoint() await client.close() @@ -414,23 +429,48 @@ async def test__manage_channel_refresh(): assert call[0][2] == new_channel await client.close() +@pytest.mark.asyncio +async def test_register_instance(): + # create the client without calling start_background_channel_refresh + with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: + get_event_loop.side_effect = RuntimeError("no event loop") + client = _make_one(project="project-id") + assert not client._channel_refresh_tasks + # first call should start background refresh + assert client._active_instances == set() + await client.register_instance("instance-1") + assert len(client._active_instances) == 1 + assert client._active_instances == {"projects/project-id/instances/instance-1"} + assert client._channel_refresh_tasks + # next call should not + with mock.patch.object(type(_make_one()), "start_background_channel_refresh") as refresh_mock: + await client.register_instance("instance-2") + assert len(client._active_instances) == 2 + assert client._active_instances == {"projects/project-id/instances/instance-1", "projects/project-id/instances/instance-2"} + refresh_mock.assert_not_called() @pytest.mark.asyncio async def test_register_instance_ping_and_warm(): # should ping and warm each new instance pool_size = 7 + with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: + get_event_loop.side_effect = RuntimeError("no event loop") + client = _make_one(project="project-id", pool_size=pool_size) + # first call should start background refresh + assert not client._channel_refresh_tasks + await client.register_instance("instance-1") client = _make_one(project="project-id", pool_size=pool_size) assert len(client._channel_refresh_tasks) == pool_size assert not client._active_instances # next calls should trigger ping and warm with mock.patch.object(type(_make_one()), "_ping_and_warm_instances") as ping_mock: # new instance should trigger ping and warm - await client.register_instance("instance-1") - assert ping_mock.call_count == pool_size await client.register_instance("instance-2") + assert ping_mock.call_count == pool_size + await client.register_instance("instance-3") assert ping_mock.call_count == pool_size * 2 # duplcate instances should not trigger ping and warm - await client.register_instance("instance-2") + await client.register_instance("instance-3") assert ping_mock.call_count == pool_size * 2 await client.close() @@ -533,11 +573,13 @@ async def test_close_with_timeout(): @pytest.mark.asyncio -async def test___del__(): +async def test___del__(recwarn): # no warnings on __del__ after close pool_size = 7 client = _make_one(project="project-id", pool_size=pool_size) + assert len(recwarn) == 0 await client.close() + assert len(recwarn) == 0 @pytest.mark.asyncio @@ -579,10 +621,11 @@ def test_client_ctor_sync(): # initializing client in a sync context should raise RuntimeError from google.cloud.bigtable.client import BigtableDataClient - with pytest.raises(RuntimeError) as err: - BigtableDataClient(project="project-id") - assert "event loop" in str(err.value) - + with pytest.warns(UserWarning) as warnings: + client = BigtableDataClient(project="project-id") + assert "event loop" in str(warnings[0].message) + assert client.project == "project-id" + assert client._channel_refresh_tasks == [] ###################################################################### # Table Tests @@ -629,6 +672,11 @@ def test_table_ctor_sync(): from google.cloud.bigtable.client import Table client = mock.Mock() - with pytest.raises(RuntimeError) as err: - Table(client, "instance-id", "table-id") - assert "event loop" in str(err.value) + with pytest.warns(UserWarning) as warnings: + table = Table(client, "instance-id", "table-id") + assert "event loop" in str(warnings[0].message) + assert table.table_id == "table-id" + assert table.instance == "instance-id" + assert table.app_profile_id is None + assert table.metadata == [] + assert table.client is client From 8001240e2d122438fabef7f47e53d16fb2bf2694 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 29 Mar 2023 16:07:25 -0700 Subject: [PATCH 127/349] removed __del__ --- google/cloud/bigtable/client.py | 13 ----- tests/unit/test_client.py | 89 ++++++++++++--------------------- 2 files changed, 33 insertions(+), 69 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index d8d7865d9..4c60e3de4 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -141,19 +141,6 @@ def transport(self) -> PooledBigtableGrpcAsyncIOTransport: """ return cast(PooledBigtableGrpcAsyncIOTransport, self._client.transport) - def __del__(self): - """ - Call close on garbage collection - - Raise warming if background tasks are still running - """ - if hasattr(self, "_channel_refresh_tasks") and self._channel_refresh_tasks: - warnings.warn( - "BigtableDataClient instance is being garbage collected without " - "being closed. Please call the close() method to ensure all " - "background tasks are cancelled." - ) - async def close(self, timeout: float = 2.0): """ Cancel all background tasks diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 641c6ccaf..fb51652bd 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -212,6 +212,7 @@ async def test_channel_pool_replace(): assert client.transport.channel_pool[i] != start_pool[i] await client.close() +@pytest.mark.filterwarnings("ignore::UserWarning") def test_start_background_channel_refresh_sync(): # should raise RuntimeError if called in a sync context client = _make_one(project="project-id") @@ -388,7 +389,8 @@ async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sle @pytest.mark.asyncio -async def test__manage_channel_refresh(): +@pytest.mark.parametrize("num_cycles", [0, 1, 10, 100]) +async def test__manage_channel_refresh(num_cycles): # make sure that channels are properly refreshed from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( PooledBigtableGrpcAsyncIOTransport, @@ -399,37 +401,37 @@ async def test__manage_channel_refresh(): channel_idx = 1 new_channel = grpc.aio.insecure_channel("localhost:8080") - for num_cycles in [0, 1, 10, 100]: - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "replace_channel" - ) as replace_channel: - with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = [None for i in range(num_cycles)] + [ - asyncio.CancelledError - ] - client = _make_one(project="project-id") - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "create_channel" - ) as create_channel: - create_channel.return_value = new_channel - try: - await client._manage_channel( - channel_idx, - refresh_interval=expected_refresh, - grace_period=expected_grace, - ) - except asyncio.CancelledError: - pass - assert sleep.call_count == num_cycles + 1 - assert create_channel.call_count == num_cycles - assert replace_channel.call_count == num_cycles - for call in replace_channel.call_args_list: - assert call[0][0] == channel_idx - assert call[0][1] == expected_grace - assert call[0][2] == new_channel - await client.close() + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "replace_channel" + ) as replace_channel: + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = [None for i in range(num_cycles)] + [ + asyncio.CancelledError + ] + client = _make_one(project="project-id") + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "create_channel" + ) as create_channel: + create_channel.return_value = new_channel + try: + await client._manage_channel( + channel_idx, + refresh_interval=expected_refresh, + grace_period=expected_grace, + ) + except asyncio.CancelledError: + pass + assert sleep.call_count == num_cycles + 1 + assert create_channel.call_count == num_cycles + assert replace_channel.call_count == num_cycles + for call in replace_channel.call_args_list: + assert call[0][0] == channel_idx + assert call[0][1] == expected_grace + assert call[0][2] == new_channel + await client.close() @pytest.mark.asyncio +@pytest.mark.filterwarnings("ignore::UserWarning") async def test_register_instance(): # create the client without calling start_background_channel_refresh with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: @@ -450,6 +452,7 @@ async def test_register_instance(): refresh_mock.assert_not_called() @pytest.mark.asyncio +@pytest.mark.filterwarnings("ignore::UserWarning") async def test_register_instance_ping_and_warm(): # should ping and warm each new instance pool_size = 7 @@ -572,32 +575,6 @@ async def test_close_with_timeout(): await client.close() -@pytest.mark.asyncio -async def test___del__(recwarn): - # no warnings on __del__ after close - pool_size = 7 - client = _make_one(project="project-id", pool_size=pool_size) - assert len(recwarn) == 0 - await client.close() - assert len(recwarn) == 0 - - -@pytest.mark.asyncio -@pytest.mark.filterwarnings("ignore::UserWarning") -async def test___del____no_close(): - import warnings - - # if client is garbage collected before being closed, it should raise a warning - pool_size = 7 - client = _make_one(project="project-id", pool_size=pool_size) - assert len(client._channel_refresh_tasks) == pool_size - with pytest.warns(UserWarning) as warnings: - client.__del__() - assert len(warnings) == 1 - assert "Please call the close() method" in str(warnings[0].message) - await client.close() - - @pytest.mark.asyncio async def test_context_manager(): # context manager should close the client cleanly From 7c9cea73253e4e71c1890b0d84d861e1461094e0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 29 Mar 2023 16:13:48 -0700 Subject: [PATCH 128/349] changed warning type --- google/cloud/bigtable/client.py | 6 ++++-- tests/unit/test_client.py | 10 +++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 4c60e3de4..2d9fb8c04 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -117,7 +117,8 @@ def __init__( except RuntimeError: warnings.warn( "BigtableDataClient should be started in an " - "asyncio event loop. Channel refresh will not be started" + "asyncio event loop. Channel refresh will not be started", + RuntimeWarning ) def start_background_channel_refresh(self) -> None: @@ -331,7 +332,8 @@ def __init__( except RuntimeError: warnings.warn( "Table should be created in an asyncio event loop." - " Instance will not be registered with client for refresh" + " Instance will not be registered with client for refresh", + RuntimeWarning, ) async def read_rows_stream( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index fb51652bd..d5c959d1c 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -212,7 +212,7 @@ async def test_channel_pool_replace(): assert client.transport.channel_pool[i] != start_pool[i] await client.close() -@pytest.mark.filterwarnings("ignore::UserWarning") +@pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_start_background_channel_refresh_sync(): # should raise RuntimeError if called in a sync context client = _make_one(project="project-id") @@ -431,7 +431,7 @@ async def test__manage_channel_refresh(num_cycles): await client.close() @pytest.mark.asyncio -@pytest.mark.filterwarnings("ignore::UserWarning") +@pytest.mark.filterwarnings("ignore::RuntimeWarning") async def test_register_instance(): # create the client without calling start_background_channel_refresh with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: @@ -452,7 +452,7 @@ async def test_register_instance(): refresh_mock.assert_not_called() @pytest.mark.asyncio -@pytest.mark.filterwarnings("ignore::UserWarning") +@pytest.mark.filterwarnings("ignore::RuntimeWarning") async def test_register_instance_ping_and_warm(): # should ping and warm each new instance pool_size = 7 @@ -598,7 +598,7 @@ def test_client_ctor_sync(): # initializing client in a sync context should raise RuntimeError from google.cloud.bigtable.client import BigtableDataClient - with pytest.warns(UserWarning) as warnings: + with pytest.warns(RuntimeWarning) as warnings: client = BigtableDataClient(project="project-id") assert "event loop" in str(warnings[0].message) assert client.project == "project-id" @@ -649,7 +649,7 @@ def test_table_ctor_sync(): from google.cloud.bigtable.client import Table client = mock.Mock() - with pytest.warns(UserWarning) as warnings: + with pytest.warns(RuntimeWarning) as warnings: table = Table(client, "instance-id", "table-id") assert "event loop" in str(warnings[0].message) assert table.table_id == "table-id" From 3bbebead76d680de1d5b0d2ec00c92fe85e8ae17 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 29 Mar 2023 16:24:23 -0700 Subject: [PATCH 129/349] ran blacken --- google/cloud/bigtable/client.py | 7 ++-- tests/unit/test_client.py | 57 +++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 2d9fb8c04..1aa1f19f7 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -118,7 +118,7 @@ def __init__( warnings.warn( "BigtableDataClient should be started in an " "asyncio event loop. Channel refresh will not be started", - RuntimeWarning + RuntimeWarning, ) def start_background_channel_refresh(self) -> None: @@ -131,7 +131,10 @@ def start_background_channel_refresh(self) -> None: # raise RuntimeError if there is no event loop asyncio.get_running_loop() for channel_idx in range(len(self.transport.channel_pool)): - refresh_task = asyncio.create_task(self._manage_channel(channel_idx), name=f"BigtableDataClient channel refresh {channel_idx}") + refresh_task = asyncio.create_task( + self._manage_channel(channel_idx), + name=f"BigtableDataClient channel refresh {channel_idx}", + ) self._channel_refresh_tasks.append(refresh_task) @property diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index d5c959d1c..f324abfc4 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -116,7 +116,9 @@ async def test_ctor_dict_options(): from google.cloud.bigtable.client import BigtableDataClient client_options = {"api_endpoint": "foo.bar:1234"} - with mock.patch.object(BigtableDataClient, "start_background_channel_refresh") as start_background_refresh: + with mock.patch.object( + BigtableDataClient, "start_background_channel_refresh" + ) as start_background_refresh: with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: _make_one(client_options=client_options) bigtable_client_init.assert_called_once() @@ -126,6 +128,7 @@ async def test_ctor_dict_options(): assert isinstance(called_options, ClientOptions) start_background_refresh.assert_called_once() + @pytest.mark.asyncio async def test_veneer_grpc_headers(): # client_info should be populated with headers to @@ -212,6 +215,7 @@ async def test_channel_pool_replace(): assert client.transport.channel_pool[i] != start_pool[i] await client.close() + @pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_start_background_channel_refresh_sync(): # should raise RuntimeError if called in a sync context @@ -219,6 +223,7 @@ def test_start_background_channel_refresh_sync(): with pytest.raises(RuntimeError): client.start_background_channel_refresh() + @pytest.mark.asyncio async def test_start_background_channel_refresh_tasks_exist(): # if tasks exist, should do nothing @@ -228,6 +233,7 @@ async def test_start_background_channel_refresh_tasks_exist(): create_task.assert_not_called() await client.close() + @pytest.mark.asyncio @pytest.mark.parametrize("pool_size", [1, 3, 7]) async def test_start_background_channel_refresh(pool_size): @@ -278,19 +284,20 @@ async def test__ping_and_warm_instances(): @pytest.mark.asyncio @pytest.mark.parametrize( - "refresh_interval, wait_time, expected_sleep", - [ - (0,0,0), - (0,1,0), - (10,0,10), - (10,5,5), - (10,10,0), - (10,15,0), - ] + "refresh_interval, wait_time, expected_sleep", + [ + (0, 0, 0), + (0, 1, 0), + (10, 0, 10), + (10, 5, 5), + (10, 10, 0), + (10, 15, 0), + ], ) async def test__manage_channel_first_sleep(refresh_interval, wait_time, expected_sleep): # first sleep time should be `refresh_interval` seconds after client init import time + with mock.patch.object(time, "time") as time: time.return_value = 0 with mock.patch.object(asyncio, "sleep") as sleep: @@ -303,10 +310,12 @@ async def test__manage_channel_first_sleep(refresh_interval, wait_time, expected pass sleep.assert_called_once() call_time = sleep.call_args[0][0] - assert abs(call_time - expected_sleep) < 0.1, \ - f"refresh_interval: {refresh_interval}, wait_time: {wait_time}, expected_sleep: {expected_sleep}" + assert ( + abs(call_time - expected_sleep) < 0.1 + ), f"refresh_interval: {refresh_interval}, wait_time: {wait_time}, expected_sleep: {expected_sleep}" await client.close() + @pytest.mark.asyncio async def test__manage_channel_ping_and_warm(): from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( @@ -354,12 +363,12 @@ async def test__manage_channel_ping_and_warm(): @pytest.mark.asyncio @pytest.mark.parametrize( - "refresh_interval, num_cycles, expected_sleep", - [ - (None, 1, 60*45), - (10, 10, 100), - (10, 1, 10), - ] + "refresh_interval, num_cycles, expected_sleep", + [ + (None, 1, 60 * 45), + (10, 10, 100), + (10, 1, 10), + ], ) async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sleep): # make sure that sleeps work as expected @@ -430,6 +439,7 @@ async def test__manage_channel_refresh(num_cycles): assert call[0][2] == new_channel await client.close() + @pytest.mark.asyncio @pytest.mark.filterwarnings("ignore::RuntimeWarning") async def test_register_instance(): @@ -445,12 +455,18 @@ async def test_register_instance(): assert client._active_instances == {"projects/project-id/instances/instance-1"} assert client._channel_refresh_tasks # next call should not - with mock.patch.object(type(_make_one()), "start_background_channel_refresh") as refresh_mock: + with mock.patch.object( + type(_make_one()), "start_background_channel_refresh" + ) as refresh_mock: await client.register_instance("instance-2") assert len(client._active_instances) == 2 - assert client._active_instances == {"projects/project-id/instances/instance-1", "projects/project-id/instances/instance-2"} + assert client._active_instances == { + "projects/project-id/instances/instance-1", + "projects/project-id/instances/instance-2", + } refresh_mock.assert_not_called() + @pytest.mark.asyncio @pytest.mark.filterwarnings("ignore::RuntimeWarning") async def test_register_instance_ping_and_warm(): @@ -604,6 +620,7 @@ def test_client_ctor_sync(): assert client.project == "project-id" assert client._channel_refresh_tasks == [] + ###################################################################### # Table Tests ###################################################################### From 8bff9d03453e8805759a7a2c08bd04818f630739 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 29 Mar 2023 16:27:35 -0700 Subject: [PATCH 130/349] improved task naming --- google/cloud/bigtable/client.py | 8 +++++--- tests/unit/test_client.py | 21 +++++++++++++++++++-- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 1aa1f19f7..657a6f25f 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -21,6 +21,7 @@ import grpc import time import warnings +import sys from google.cloud.bigtable_v2.services.bigtable.client import BigtableClientMeta from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient @@ -116,7 +117,7 @@ def __init__( self.start_background_channel_refresh() except RuntimeError: warnings.warn( - "BigtableDataClient should be started in an " + f"{self.__class__.__name__} should be started in an " "asyncio event loop. Channel refresh will not be started", RuntimeWarning, ) @@ -132,9 +133,10 @@ def start_background_channel_refresh(self) -> None: asyncio.get_running_loop() for channel_idx in range(len(self.transport.channel_pool)): refresh_task = asyncio.create_task( - self._manage_channel(channel_idx), - name=f"BigtableDataClient channel refresh {channel_idx}", + self._manage_channel(channel_idx) ) + if sys.version_info >= (3, 8): + refresh_task.set_name(f"{self.__class__.__name__} channel refresh {channel_idx}") self._channel_refresh_tasks.append(refresh_task) @property diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f324abfc4..99d9b5354 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -13,10 +13,10 @@ # limitations under the License. -import unittest import grpc import asyncio import re +import sys from google.auth.credentials import AnonymousCredentials import pytest @@ -252,6 +252,21 @@ async def test_start_background_channel_refresh(pool_size): await client.close() +@pytest.mark.asyncio +@pytest.mark.skipif( + sys.version_info < (3, 8), reason="Task.name requires python3.8 or higher" +) +async def test_start_background_channel_refresh_tasks_names(): + # if tasks exist, should do nothing + pool_size = 3 + client = _make_one(project="project-id", pool_size=pool_size) + for i in range(pool_size): + name = client._channel_refresh_tasks[i].get_name() + assert str(i) in name + assert "BigtableDataClient channel refresh " in name + await client.close() + + @pytest.mark.asyncio async def test__ping_and_warm_instances(): # test with no instances @@ -616,7 +631,9 @@ def test_client_ctor_sync(): with pytest.warns(RuntimeWarning) as warnings: client = BigtableDataClient(project="project-id") - assert "event loop" in str(warnings[0].message) + assert "BigtableDataClient should be started in an asyncio event loop." in str( + warnings[0].message + ) assert client.project == "project-id" assert client._channel_refresh_tasks == [] From 4ae2146fc85fc82466a44acac89a4512fc22e24a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 29 Mar 2023 16:32:58 -0700 Subject: [PATCH 131/349] fixed style issues --- google/cloud/bigtable/client.py | 8 ++++---- tests/unit/test_client.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 657a6f25f..05b07deda 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -132,11 +132,11 @@ def start_background_channel_refresh(self) -> None: # raise RuntimeError if there is no event loop asyncio.get_running_loop() for channel_idx in range(len(self.transport.channel_pool)): - refresh_task = asyncio.create_task( - self._manage_channel(channel_idx) - ) + refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) if sys.version_info >= (3, 8): - refresh_task.set_name(f"{self.__class__.__name__} channel refresh {channel_idx}") + refresh_task.set_name( + f"{self.__class__.__name__} channel refresh {channel_idx}" + ) self._channel_refresh_tasks.append(refresh_task) @property diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 99d9b5354..d99cc37a4 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -24,9 +24,9 @@ # try/except added for compatibility with python < 3.8 try: from unittest import mock - from unittest.mock import AsyncMock # pragma: NO COVER + from unittest.mock import AsyncMock # type: ignore except ImportError: # pragma: NO COVER - import mock + import mock # type: ignore VENEER_HEADER_REGEX = re.compile( r"gapic\/[0-9]+\.[\w.-]+ gax\/[0-9]+\.[\w.-]+ gccl\/[0-9]+\.[\w.-]+ gl-python\/[0-9]+\.[\w.-]+ grpc\/[0-9]+\.[\w.-]+" From 00be65a8d330d80f4efad87980ba378dd5c484b7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 29 Mar 2023 16:40:00 -0700 Subject: [PATCH 132/349] fixed broken test --- tests/unit/test_read_rows_query.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_read_rows_query.py b/tests/unit/test_read_rows_query.py index 2f0261eba..822a75384 100644 --- a/tests/unit/test_read_rows_query.py +++ b/tests/unit/test_read_rows_query.py @@ -224,24 +224,24 @@ def test_add_range(self): query.add_range(start_is_inclusive=True, end_is_inclusive=True) self.assertEqual( exc.exception.args, - ("start_is_inclusive must not be set without start_key",), + ("start_is_inclusive must be set with start_key",), ) with self.assertRaises(ValueError) as exc: query.add_range(start_is_inclusive=False, end_is_inclusive=False) self.assertEqual( exc.exception.args, - ("start_is_inclusive must not be set without start_key",), + ("start_is_inclusive must be set with start_key",), ) with self.assertRaises(ValueError) as exc: query.add_range(start_is_inclusive=False) self.assertEqual( exc.exception.args, - ("start_is_inclusive must not be set without start_key",), + ("start_is_inclusive must be set with start_key",), ) with self.assertRaises(ValueError) as exc: query.add_range(end_is_inclusive=True) self.assertEqual( - exc.exception.args, ("end_is_inclusive must not be set without end_key",) + exc.exception.args, ("end_is_inclusive must be set with end_key",) ) # test with invalid keys with self.assertRaises(ValueError) as exc: From 8f15e9c2b80e9ff28ad622ed81c2609f950a2caf Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 30 Mar 2023 20:53:50 +0000 Subject: [PATCH 133/349] Update docstring Co-authored-by: Mariatta Wijaya --- google/cloud/bigtable/read_rows_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index f16de7b14..24f85f622 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -24,7 +24,7 @@ @dataclass class _RangePoint: - # model class for a point in a row range + """Model class for a point in a row range""" key: row_key is_inclusive: bool From b9dc2f76ae9fa48b35a22dc8cd9e839a4bdcd3d6 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 30 Mar 2023 14:31:27 -0700 Subject: [PATCH 134/349] got 3.7 tests working --- google/cloud/bigtable/client.py | 6 +++++- tests/unit/test_client.py | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 05b07deda..40685ac6a 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -97,8 +97,12 @@ def __init__( client_options = cast( Optional[client_options_lib.ClientOptions], client_options ) + mixin_args = {"project": project, "credentials": credentials} + # support google-api-core <=1.5.0, which does not have credentials + if "credentials" not in _ClientProjectMixin.__init__.__code__.co_varnames: + mixin_args.pop("credentials") # initialize client - _ClientProjectMixin.__init__(self, project=project, credentials=credentials) + _ClientProjectMixin.__init__(self, **mixin_args) # raises RuntimeError if called outside of an async run loop context BigtableAsyncClient.__init__( self, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index d99cc37a4..a57d4cad5 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -27,6 +27,7 @@ from unittest.mock import AsyncMock # type: ignore except ImportError: # pragma: NO COVER import mock # type: ignore + from mock import AsyncMock # type: ignore VENEER_HEADER_REGEX = re.compile( r"gapic\/[0-9]+\.[\w.-]+ gax\/[0-9]+\.[\w.-]+ gccl\/[0-9]+\.[\w.-]+ gl-python\/[0-9]+\.[\w.-]+ grpc\/[0-9]+\.[\w.-]+" @@ -631,8 +632,10 @@ def test_client_ctor_sync(): with pytest.warns(RuntimeWarning) as warnings: client = BigtableDataClient(project="project-id") + expected_warning = [w for w in warnings if "client.py" in w.filename] + assert len(expected_warning) == 1 assert "BigtableDataClient should be started in an asyncio event loop." in str( - warnings[0].message + expected_warning[0].message ) assert client.project == "project-id" assert client._channel_refresh_tasks == [] From 19036d81dec1a1adf234f13f53f93002e167c0e5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 30 Mar 2023 15:06:34 -0700 Subject: [PATCH 135/349] fixed style issue --- google/cloud/bigtable/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 40685ac6a..0ab43188e 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -99,7 +99,7 @@ def __init__( ) mixin_args = {"project": project, "credentials": credentials} # support google-api-core <=1.5.0, which does not have credentials - if "credentials" not in _ClientProjectMixin.__init__.__code__.co_varnames: + if "credentials" not in _ClientProjectMixin.__init__.__code__.co_varnames: mixin_args.pop("credentials") # initialize client _ClientProjectMixin.__init__(self, **mixin_args) From 8873e9d4b7aed64a35c1e7b4056794949510c654 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 30 Mar 2023 15:41:48 -0700 Subject: [PATCH 136/349] remvoed keys, values, items --- google/cloud/bigtable/row_response.py | 52 +++++++++++--------------- tests/unit/test_row_response.py | 54 ++++----------------------- 2 files changed, 28 insertions(+), 78 deletions(-) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index d08f3caea..c2c6f47de 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -136,25 +136,27 @@ def __str__(self) -> str: } """ output = ["{"] - for key in self.keys(): - if len(self[key]) == 0: - output.append(f" {key}: []") - elif len(self[key]) == 1: - output.append( - f" (family='{key[0]}', qualifier={key[1]}): [{self[key][0]}]," - ) + for family,qualifier in self.get_column_components(): + cell_list = self[family, qualifier] + line = [f" (family={family!r}, qualifier={qualifier!r}): "] + if len(cell_list) == 0: + line.append("[],") + elif len(cell_list) == 1: + line.append(f"[{cell_list[0]}],") else: - output.append( - f" (family='{key[0]}', qualifier={key[1]}): [{self[key][0]}, (+{len(self[key])-1} more)]," + line.append( + f"[{cell_list[0]}, (+{len(cell_list)-1} more)]," ) + output.append("".join(line)) output.append("}") return "\n".join(output) def __repr__(self): cell_str_buffer = ["{"] - for key, cell_list in self.items(): + for family,qualifier in self.get_column_components(): + cell_list = self[family, qualifier] repr_list = [cell.to_dict(use_nanoseconds=True) for cell in cell_list] - cell_str_buffer.append(f" ('{key[0]}', {key[1]}): {repr_list},") + cell_str_buffer.append(f" ('{family}', {qualifier}): {repr_list},") cell_str_buffer.append("}") cell_str = "\n".join(cell_str_buffer) output = f"RowResponse(key={self.row_key!r}, cells={cell_str})" @@ -255,7 +257,7 @@ def __len__(self): """ return len(self._cells_list) - def keys(self): + def get_column_components(self): """ Returns a list of (family, qualifier) pairs associated with the cells @@ -267,22 +269,7 @@ def keys(self): key_list.append((family, qualifier)) return key_list - def values(self): - """ - Returns the the cells in the row, broken into lists - corresponding to the family and qualifier - """ - result = [] - for key in self.keys(): - result.append(self[key]) - return result - def items(self): - """ - Iterates over (family, qualifier) pairs and the list of associated cells - """ - for key in self.keys(): - yield key, self[key] def __eq__(self, other): """ @@ -296,11 +283,14 @@ def __eq__(self, other): return False if len(self._cells_list) != len(other._cells_list): return False - keys, other_keys = self.keys(), other.keys() - if keys != other_keys: + components = self.get_column_components() + other_components = other.get_column_components() + if len(components) != len(other_components): + return False + if components != other_components: return False - for key in keys: - if len(self[key]) != len(other[key]): + for family,qualifier in components: + if len(self[family, qualifier]) != len(other[family,qualifier]): return False # compare individual cell lists if self._cells_list != other._cells_list: diff --git a/tests/unit/test_row_response.py b/tests/unit/test_row_response.py index 638292d2d..48f4b9c46 100644 --- a/tests/unit/test_row_response.py +++ b/tests/unit/test_row_response.py @@ -445,7 +445,7 @@ def test_family_qualifier_indexing(self): with self.assertRaises(TypeError): row_response[b"new_family_id", b"new_qualifier"] - def test_keys(self): + def test_get_column_components(self): # should be able to retrieve (family,qualifier) tuples as keys new_family_id = "new_family_id" new_qualifier = b"new_qualifier" @@ -475,60 +475,20 @@ def test_keys(self): ) row_response = self._make_one(TEST_ROW_KEY, [cell, cell2, cell3]) - self.assertEqual(len(row_response.keys()), 2) + self.assertEqual(len(row_response.get_column_components()), 2) self.assertEqual( - row_response.keys(), + row_response.get_column_components(), [(TEST_FAMILY_ID, TEST_QUALIFIER), (new_family_id, new_qualifier)], ) row_response = self._make_one(TEST_ROW_KEY, []) - self.assertEqual(len(row_response.keys()), 0) - self.assertEqual(row_response.keys(), []) + self.assertEqual(len(row_response.get_column_components()), 0) + self.assertEqual(row_response.get_column_components(), []) row_response = self._make_one(TEST_ROW_KEY, [cell]) - self.assertEqual(len(row_response.keys()), 1) - self.assertEqual(row_response.keys(), [(TEST_FAMILY_ID, TEST_QUALIFIER)]) + self.assertEqual(len(row_response.get_column_components()), 1) + self.assertEqual(row_response.get_column_components(), [(TEST_FAMILY_ID, TEST_QUALIFIER)]) - def test_values(self): - # values should return the all cells, divided into lists - # according to (family,qualifier) pairs - cell_list = [self._make_cell(qualifier=str(i % 5).encode()) for i in range(10)] - row_response = self._make_one(TEST_ROW_KEY, cell_list) - sorted(cell_list) - - values = list(row_response.values()) - self.assertEqual(len(values), 5) - self.assertEqual(len(values[0]), 2) - - keys = list(row_response.keys()) - values = list(row_response.values()) - for i in range(len(keys)): - self.assertEqual(row_response[keys[i]], values[i]) - - def test_items(self): - cell_list = [self._make_cell() for i in range(10)] - sorted(cell_list) - row_response = self._make_one(TEST_ROW_KEY, cell_list) - - self.assertEqual(len(list(row_response.items())), 1) - self.assertEqual( - list(row_response.items())[0][0], (TEST_FAMILY_ID, TEST_QUALIFIER) - ) - self.assertEqual(list(row_response.items())[0][1], cell_list) - - row_response = self._make_one(TEST_ROW_KEY, []) - self.assertEqual(len(list(row_response.items())), 0) - - cell_list = [self._make_cell(qualifier=str(i).encode()) for i in range(10)] - row_response = self._make_one(TEST_ROW_KEY, cell_list) - sorted(cell_list) - self.assertEqual(len(list(row_response.items())), 10) - keys = [t[0] for t in row_response.items()] - cells = [t[1] for t in row_response.items()] - for i in range(10): - self.assertEqual(keys[i], (TEST_FAMILY_ID, str(i).encode())) - self.assertEqual(len(cells[i]), 1) - self.assertEqual(cells[i][0], cell_list[i]) def test_index_of(self): # given a cell, should find index in underlying list From ff7dcbba2583df223d146f2176643b0cb244ce28 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 30 Mar 2023 15:55:02 -0700 Subject: [PATCH 137/349] removed nanosecond timestamps --- google/cloud/bigtable/row_response.py | 32 +++++++----------- tests/unit/test_row_response.py | 47 ++++++++++----------------- 2 files changed, 28 insertions(+), 51 deletions(-) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index c2c6f47de..d91667fe5 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -155,7 +155,7 @@ def __repr__(self): cell_str_buffer = ["{"] for family,qualifier in self.get_column_components(): cell_list = self[family, qualifier] - repr_list = [cell.to_dict(use_nanoseconds=True) for cell in cell_list] + repr_list = [cell.to_dict() for cell in cell_list] cell_str_buffer.append(f" ('{family}', {qualifier}): {repr_list},") cell_str_buffer.append("}") cell_str = "\n".join(cell_str_buffer) @@ -320,7 +320,7 @@ def __init__( row: row_key, family: family_id, column_qualifier: qualifier | str, - timestamp_ns: int, + timestamp_micros: int, labels: list[str] | None = None, ): """ @@ -335,7 +335,7 @@ def __init__( if isinstance(column_qualifier, str): column_qualifier = column_qualifier.encode() self.column_qualifier = column_qualifier - self.timestamp_ns = timestamp_ns + self.timestamp_micros = timestamp_micros self.labels = labels if labels is not None else [] @staticmethod @@ -348,19 +348,12 @@ def _from_dict( CellResponse objects are not intended to be constructed by users. They are returned by the Bigtable backend. """ - # Bigtable backend will use microseconds for timestamps, - # but the Python library prefers nanoseconds where possible - timestamp = cell_dict.get( - "timestamp_ns", cell_dict.get("timestamp_micros", -1) * 1000 - ) - if timestamp < 0: - raise ValueError("invalid timestamp") cell_obj = CellResponse( cell_dict["value"], row_key, family, qualifier, - timestamp, + cell_dict.get("timestamp_micros"), cell_dict.get("labels", None), ) return cell_obj @@ -373,7 +366,7 @@ def __int__(self) -> int: """ return int.from_bytes(self.value, byteorder="big", signed=True) - def to_dict(self, use_nanoseconds=False) -> dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Returns a dictionary representation of the cell in the Bigtable Cell proto format @@ -383,10 +376,7 @@ def to_dict(self, use_nanoseconds=False) -> dict[str, Any]: cell_dict: dict[str, Any] = { "value": self.value, } - if use_nanoseconds: - cell_dict["timestamp_ns"] = self.timestamp_ns - else: - cell_dict["timestamp_micros"] = self.timestamp_ns // 1000 + cell_dict["timestamp_micros"] = self.timestamp_micros if self.labels: cell_dict["labels"] = self.labels return cell_dict @@ -402,7 +392,7 @@ def __repr__(self): """ Returns a string representation of the cell """ - return f"CellResponse(value={self.value!r}, row={self.row_key!r}, family='{self.family}', column_qualifier={self.column_qualifier!r}, timestamp_ns={self.timestamp_ns}, labels={self.labels})" + return f"CellResponse(value={self.value!r}, row={self.row_key!r}, family='{self.family}', column_qualifier={self.column_qualifier!r}, timestamp_micros={self.timestamp_micros}, labels={self.labels})" """For Bigtable native ordering""" @@ -415,14 +405,14 @@ def __lt__(self, other) -> bool: this_ordering = ( self.family, self.column_qualifier, - -self.timestamp_ns, + -self.timestamp_micros, self.value, self.labels, ) other_ordering = ( other.family, other.column_qualifier, - -other.timestamp_ns, + -other.timestamp_micros, other.value, other.labels, ) @@ -439,7 +429,7 @@ def __eq__(self, other) -> bool: and self.family == other.family and self.column_qualifier == other.column_qualifier and self.value == other.value - and self.timestamp_ns == other.timestamp_ns + and self.timestamp_micros == other.timestamp_micros and len(self.labels) == len(other.labels) and all([label in other.labels for label in self.labels]) ) @@ -460,7 +450,7 @@ def __hash__(self): self.family, self.column_qualifier, self.value, - self.timestamp_ns, + self.timestamp_micros, tuple(self.labels), ) ) diff --git a/tests/unit/test_row_response.py b/tests/unit/test_row_response.py index 48f4b9c46..763751d34 100644 --- a/tests/unit/test_row_response.py +++ b/tests/unit/test_row_response.py @@ -20,7 +20,7 @@ TEST_ROW_KEY = b"row" TEST_FAMILY_ID = "cf1" TEST_QUALIFIER = b"col" -TEST_TIMESTAMP = time.time_ns() +TEST_TIMESTAMP = time.time_ns() // 1000 TEST_LABELS = ["label1", "label2"] @@ -59,7 +59,7 @@ def test_ctor_dict(self): cells = { (TEST_FAMILY_ID, TEST_QUALIFIER): [ self._make_cell().to_dict(), - self._make_cell().to_dict(use_nanoseconds=True), + self._make_cell().to_dict(), ] } row_response = self._make_one(TEST_ROW_KEY, cells) @@ -71,9 +71,8 @@ def test_ctor_dict(self): self.assertEqual(row_response[i].family, TEST_FAMILY_ID) self.assertEqual(row_response[i].column_qualifier, TEST_QUALIFIER) self.assertEqual(row_response[i].labels, TEST_LABELS) - self.assertEqual(row_response[0].timestamp_ns, TEST_TIMESTAMP) - # second cell was initialized with use_nanoseconds=False, so it doesn't have full precision - self.assertEqual(row_response[1].timestamp_ns, TEST_TIMESTAMP // 1000 * 1000) + self.assertEqual(row_response[0].timestamp_micros, TEST_TIMESTAMP) + self.assertEqual(row_response[1].timestamp_micros, TEST_TIMESTAMP) def test_ctor_bad_cell(self): cells = [self._make_cell(), self._make_cell()] @@ -128,7 +127,7 @@ def test__repr__(self): from google.cloud.bigtable.row_response import RowResponse cell_str = ( - "{'value': b'1234', 'timestamp_ns': %d, 'labels': ['label1', 'label2']}" + "{'value': b'1234', 'timestamp_micros': %d, 'labels': ['label1', 'label2']}" % (TEST_TIMESTAMP) ) expected_prefix = "RowResponse(key=b'row', cells=" @@ -136,7 +135,7 @@ def test__repr__(self): self.assertIn(expected_prefix, repr(row)) self.assertIn(cell_str, repr(row)) expected_full = ( - "RowResponse(key=b'row', cells={\n ('cf1', b'col'): [{'value': b'1234', 'timestamp_ns': %d, 'labels': ['label1', 'label2']}],\n})" + "RowResponse(key=b'row', cells={\n ('cf1', b'col'): [{'value': b'1234', 'timestamp_micros': %d, 'labels': ['label1', 'label2']}],\n})" % (TEST_TIMESTAMP) ) self.assertEqual(expected_full, repr(row)) @@ -195,12 +194,12 @@ def test_to_dict(self): "cells": [ { "value": TEST_VALUE, - "timestamp_micros": TEST_TIMESTAMP // 1000, + "timestamp_micros": TEST_TIMESTAMP, "labels": TEST_LABELS, }, { "value": b"other", - "timestamp_micros": TEST_TIMESTAMP // 1000, + "timestamp_micros": TEST_TIMESTAMP, "labels": TEST_LABELS, }, ], @@ -223,10 +222,10 @@ def test_to_dict(self): self.assertEqual(column.qualifier, TEST_QUALIFIER) self.assertEqual(len(column.cells), 2) self.assertEqual(column.cells[0].value, TEST_VALUE) - self.assertEqual(column.cells[0].timestamp_micros, TEST_TIMESTAMP // 1000) + self.assertEqual(column.cells[0].timestamp_micros, TEST_TIMESTAMP) self.assertEqual(column.cells[0].labels, TEST_LABELS) self.assertEqual(column.cells[1].value, cell2.value) - self.assertEqual(column.cells[1].timestamp_micros, TEST_TIMESTAMP // 1000) + self.assertEqual(column.cells[1].timestamp_micros, TEST_TIMESTAMP) self.assertEqual(column.cells[1].labels, TEST_LABELS) def test_iteration(self): @@ -537,7 +536,7 @@ def test_ctor(self): self.assertEqual(cell.row_key, TEST_ROW_KEY) self.assertEqual(cell.family, TEST_FAMILY_ID) self.assertEqual(cell.column_qualifier, TEST_QUALIFIER) - self.assertEqual(cell.timestamp_ns, TEST_TIMESTAMP) + self.assertEqual(cell.timestamp_micros, TEST_TIMESTAMP) self.assertEqual(cell.labels, TEST_LABELS) def test_to_dict(self): @@ -547,7 +546,7 @@ def test_to_dict(self): cell_dict = cell.to_dict() expected_dict = { "value": TEST_VALUE, - "timestamp_micros": TEST_TIMESTAMP // 1000, + "timestamp_micros": TEST_TIMESTAMP, "labels": TEST_LABELS, } self.assertEqual(len(cell_dict), len(expected_dict)) @@ -556,21 +555,9 @@ def test_to_dict(self): # should be able to construct a Cell proto from the dict cell_proto = Cell(**cell_dict) self.assertEqual(cell_proto.value, TEST_VALUE) - self.assertEqual(cell_proto.timestamp_micros, TEST_TIMESTAMP // 1000) + self.assertEqual(cell_proto.timestamp_micros, TEST_TIMESTAMP) self.assertEqual(cell_proto.labels, TEST_LABELS) - def test_to_dict_nanos_timestamp(self): - cell = self._make_one() - cell_dict = cell.to_dict(use_nanoseconds=True) - expected_dict = { - "value": TEST_VALUE, - "timestamp_ns": TEST_TIMESTAMP, - "labels": TEST_LABELS, - } - self.assertEqual(len(cell_dict), len(expected_dict)) - for key, value in expected_dict.items(): - self.assertEqual(cell_dict[key], value) - def test_to_dict_no_labels(self): from google.cloud.bigtable_v2.types import Cell @@ -585,7 +572,7 @@ def test_to_dict_no_labels(self): cell_dict = cell_no_labels.to_dict() expected_dict = { "value": TEST_VALUE, - "timestamp_micros": TEST_TIMESTAMP // 1000, + "timestamp_micros": TEST_TIMESTAMP, } self.assertEqual(len(cell_dict), len(expected_dict)) for key, value in expected_dict.items(): @@ -593,7 +580,7 @@ def test_to_dict_no_labels(self): # should be able to construct a Cell proto from the dict cell_proto = Cell(**cell_dict) self.assertEqual(cell_proto.value, TEST_VALUE) - self.assertEqual(cell_proto.timestamp_micros, TEST_TIMESTAMP // 1000) + self.assertEqual(cell_proto.timestamp_micros, TEST_TIMESTAMP) self.assertEqual(cell_proto.labels, []) def test_int_value(self): @@ -650,7 +637,7 @@ def test___repr__(self): expected = ( "CellResponse(value=b'1234', row=b'row', " + "family='cf1', column_qualifier=b'col', " - + f"timestamp_ns={TEST_TIMESTAMP}, labels=['label1', 'label2'])" + + f"timestamp_micros={TEST_TIMESTAMP}, labels=['label1', 'label2'])" ) self.assertEqual(repr(cell), expected) # should be able to construct instance from __repr__ @@ -671,7 +658,7 @@ def test___repr___no_labels(self): expected = ( "CellResponse(value=b'1234', row=b'row', " + "family='cf1', column_qualifier=b'col', " - + f"timestamp_ns={TEST_TIMESTAMP}, labels=[])" + + f"timestamp_micros={TEST_TIMESTAMP}, labels=[])" ) self.assertEqual(repr(cell_no_labels), expected) # should be able to construct instance from __repr__ From 39da24defe03c69da5b8c8956353ec7b60ff3dbb Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 30 Mar 2023 15:56:45 -0700 Subject: [PATCH 138/349] ran black --- google/cloud/bigtable/row_response.py | 14 +++++--------- tests/unit/test_row_response.py | 5 +++-- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index d91667fe5..82b0fdd47 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -136,7 +136,7 @@ def __str__(self) -> str: } """ output = ["{"] - for family,qualifier in self.get_column_components(): + for family, qualifier in self.get_column_components(): cell_list = self[family, qualifier] line = [f" (family={family!r}, qualifier={qualifier!r}): "] if len(cell_list) == 0: @@ -144,16 +144,14 @@ def __str__(self) -> str: elif len(cell_list) == 1: line.append(f"[{cell_list[0]}],") else: - line.append( - f"[{cell_list[0]}, (+{len(cell_list)-1} more)]," - ) + line.append(f"[{cell_list[0]}, (+{len(cell_list)-1} more)],") output.append("".join(line)) output.append("}") return "\n".join(output) def __repr__(self): cell_str_buffer = ["{"] - for family,qualifier in self.get_column_components(): + for family, qualifier in self.get_column_components(): cell_list = self[family, qualifier] repr_list = [cell.to_dict() for cell in cell_list] cell_str_buffer.append(f" ('{family}', {qualifier}): {repr_list},") @@ -269,8 +267,6 @@ def get_column_components(self): key_list.append((family, qualifier)) return key_list - - def __eq__(self, other): """ Implements `==` operator @@ -289,8 +285,8 @@ def __eq__(self, other): return False if components != other_components: return False - for family,qualifier in components: - if len(self[family, qualifier]) != len(other[family,qualifier]): + for family, qualifier in components: + if len(self[family, qualifier]) != len(other[family, qualifier]): return False # compare individual cell lists if self._cells_list != other._cells_list: diff --git a/tests/unit/test_row_response.py b/tests/unit/test_row_response.py index 763751d34..4eebfaa8a 100644 --- a/tests/unit/test_row_response.py +++ b/tests/unit/test_row_response.py @@ -486,8 +486,9 @@ def test_get_column_components(self): row_response = self._make_one(TEST_ROW_KEY, [cell]) self.assertEqual(len(row_response.get_column_components()), 1) - self.assertEqual(row_response.get_column_components(), [(TEST_FAMILY_ID, TEST_QUALIFIER)]) - + self.assertEqual( + row_response.get_column_components(), [(TEST_FAMILY_ID, TEST_QUALIFIER)] + ) def test_index_of(self): # given a cell, should find index in underlying list From 3a6fff11ea246266ad86129fe20d522476020e66 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 30 Mar 2023 16:06:22 -0700 Subject: [PATCH 139/349] removed from_dict --- google/cloud/bigtable/row_response.py | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py index 82b0fdd47..8e60d1743 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row_response.py @@ -59,8 +59,8 @@ def __init__( tmp_list = [] for (family, qualifier), cell_list in cells.items(): for cell_dict in cell_list: - cell_obj = CellResponse._from_dict( - key, family, qualifier, cell_dict + cell_obj = CellResponse( + row=key, family=family, column_qualifier=qualifier, **cell_dict ) tmp_list.append(cell_obj) cells = tmp_list @@ -334,26 +334,6 @@ def __init__( self.timestamp_micros = timestamp_micros self.labels = labels if labels is not None else [] - @staticmethod - def _from_dict( - row_key: bytes, family: str, qualifier: bytes, cell_dict: dict[str, Any] - ) -> CellResponse: - """ - Helper function to create CellResponse from a dictionary - - CellResponse objects are not intended to be constructed by users. - They are returned by the Bigtable backend. - """ - cell_obj = CellResponse( - cell_dict["value"], - row_key, - family, - qualifier, - cell_dict.get("timestamp_micros"), - cell_dict.get("labels", None), - ) - return cell_obj - def __int__(self) -> int: """ Allows casting cell to int From 00a3d3e0f4142ffbbb5bc21697731228952c2628 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 30 Mar 2023 17:09:54 -0700 Subject: [PATCH 140/349] got acceptance tests passing --- google/cloud/bigtable/row_merger.py | 13 ++++----- tests/unit/test_read_rows.py | 45 +++++++---------------------- 2 files changed, 17 insertions(+), 41 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index ac2e066ac..1381c3637 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -70,7 +70,7 @@ async def merge_row_stream( yield complete_row if not self.state_machine.is_terminal_state(): # read rows is complete, but there's still data in the merger - raise RuntimeError("read_rows completed with partial state remaining") + raise InvalidChunk("read_rows completed with partial state remaining") async def _generator_to_cache( self, cache: asyncio.Queue[Any], input_generator: AsyncIterable[Any] @@ -172,7 +172,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> RowResponse | None: """ if chunk.row_key in self.completed_row_keys: raise InvalidChunk(f"duplicate row key: {chunk.row_key.decode()}") - if self.last_seen_row_key and self.last_seen_row_key >= chunk.row_key: + if self.last_seen_row_key and chunk.row_key and self.last_seen_row_key >= chunk.row_key: raise InvalidChunk("Out of order row keys") if chunk.reset_row: # reset row if requested @@ -225,7 +225,7 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): raise InvalidChunk("Reset chunk has a value") self._reset_row() if not isinstance(self.current_state, AWAITING_NEW_ROW): - raise RuntimeError("Failed to reset state machine") + raise InvalidChunk("Failed to reset state machine") class State(ABC): @@ -287,7 +287,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": # ensure that all chunks after the first one are either missing a row # key or the row is the same if ( - chunk.row_key is not None + chunk.row_key and chunk.row_key != self._owner.adapter.current_key ): raise InvalidChunk("row key changed mid row") @@ -391,10 +391,9 @@ def start_cell( raise InvalidChunk("missing qualifier for a new cell") if self.current_key is None: raise InvalidChunk("no row in progress") - self.working_value = bytearray(size) - timestamp_nanos = timestamp_micros * 1000 + self.working_value = bytearray() self.working_cell = CellResponse( - b"", self.current_key, family, qualifier, labels, timestamp_nanos + b"", self.current_key, family, qualifier, timestamp_micros, labels ) def cell_value(self, value: bytes) -> None: diff --git a/tests/unit/test_read_rows.py b/tests/unit/test_read_rows.py index 3276111df..aabb32f5a 100644 --- a/tests/unit/test_read_rows.py +++ b/tests/unit/test_read_rows.py @@ -9,30 +9,7 @@ from google.cloud.bigtable.row_merger import RowMerger, InvalidChunk from google.cloud.bigtable.row_response import RowResponse - -# TODO: autogenerate protos from -# https://github.com/googleapis/conformance-tests/blob/main/bigtable/v2/proto/google/cloud/conformance/bigtable/v2/tests.proto -class ReadRowsTest(proto.Message): - class Result(proto.Message): - row_key = proto.Field(proto.STRING, number=1) - family_name = proto.Field(proto.STRING, number=2) - qualifier = proto.Field(proto.STRING, number=3) - timestamp_micros = proto.Field(proto.INT64, number=4) - value = proto.Field(proto.STRING, number=5) - label = proto.Field(proto.STRING, number=6) - error = proto.Field(proto.BOOL, number=7) - - description = proto.Field(proto.STRING, number=1) - chunks = proto.RepeatedField( - proto.MESSAGE, number=2, message=ReadRowsResponse.CellChunk - ) - results = proto.RepeatedField(proto.MESSAGE, number=3, message=Result) - - -class TestFile(proto.Message): - __test__ = False - read_rows_tests = proto.RepeatedField(proto.MESSAGE, number=1, message=ReadRowsTest) - +from .v2_client.test_row_merger import ReadRowsTest, TestFile def parse_readrows_acceptance_tests(): dirname = os.path.dirname(__file__) @@ -73,22 +50,22 @@ async def _scenerio_stream(): merger = RowMerger() results = [] async for row in merger.merge_row_stream(_scenerio_stream()): - results.append(row) + for cell in row: + cell_result = ReadRowsTest.Result( + row_key=cell.row_key, + family_name=cell.family, + qualifier=cell.column_qualifier, + timestamp_micros=cell.timestamp_micros, + value=cell.value, + label=cell.labels[0] if cell.labels else "", + ) + results.append(cell_result) if not merger.state_machine.is_terminal_state(): raise InvalidChunk("merger has partial frame after reading") except InvalidChunk: results.append(ReadRowsTest.Result(error=True)) for expected, actual in zip_longest(test_case.results, results): assert actual == expected - # def fake_read(*args, **kwargs): - # return iter([ReadRowsResponse(chunks=test_case.chunks)]) - # actual_results: List[ReadRowsTest.Result] = [] - # try: - # for row in PartialRowsData(fake_read, request=None): - # actual_results.extend(extract_results_from_row(row)) - # except (InvalidChunk, ValueError): - # actual_results.append(ReadRowsTest.Result(error=True)) - # breakpoint() @pytest.mark.asyncio From 393749f6cfbb21424492b57cdcbe9e1ad7647ef6 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 30 Mar 2023 17:11:25 -0700 Subject: [PATCH 141/349] removed type conversion --- google/cloud/bigtable/row_merger.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 1381c3637..db4f8958b 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -56,9 +56,6 @@ async def merge_row_stream( Consume chunks from a ReadRowsResponse stream into a set of Rows """ async for row_response in request_generator: - # ensure that the response is a ReadRowsResponse - if not isinstance(row_response, ReadRowsResponse): - row_response = ReadRowsResponse(row_response) last_scanned = row_response.last_scanned_row_key # if the server sends a scan heartbeat, notify the state machine. if last_scanned: From 2a4221642d30054bf3a36d8b72daf75ccf531570 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 30 Mar 2023 17:12:08 -0700 Subject: [PATCH 142/349] renamed acceptance test file --- tests/unit/{test_read_rows.py => test_read_rows_acceptance.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/unit/{test_read_rows.py => test_read_rows_acceptance.py} (100%) diff --git a/tests/unit/test_read_rows.py b/tests/unit/test_read_rows_acceptance.py similarity index 100% rename from tests/unit/test_read_rows.py rename to tests/unit/test_read_rows_acceptance.py From 536e58760e107ef730e9c0fe2a546e54aa5aafb5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 30 Mar 2023 17:13:46 -0700 Subject: [PATCH 143/349] ran blacken --- google/cloud/bigtable/row_merger.py | 11 ++++++----- tests/unit/test_read_rows_acceptance.py | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index db4f8958b..15557d1c6 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -169,7 +169,11 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> RowResponse | None: """ if chunk.row_key in self.completed_row_keys: raise InvalidChunk(f"duplicate row key: {chunk.row_key.decode()}") - if self.last_seen_row_key and chunk.row_key and self.last_seen_row_key >= chunk.row_key: + if ( + self.last_seen_row_key + and chunk.row_key + and self.last_seen_row_key >= chunk.row_key + ): raise InvalidChunk("Out of order row keys") if chunk.reset_row: # reset row if requested @@ -283,10 +287,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": # ensure that all chunks after the first one are either missing a row # key or the row is the same - if ( - chunk.row_key - and chunk.row_key != self._owner.adapter.current_key - ): + if chunk.row_key and chunk.row_key != self._owner.adapter.current_key: raise InvalidChunk("row key changed mid row") self._owner.adapter.start_cell( diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index aabb32f5a..2e3f09f48 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -11,6 +11,7 @@ from .v2_client.test_row_merger import ReadRowsTest, TestFile + def parse_readrows_acceptance_tests(): dirname = os.path.dirname(__file__) filename = os.path.join(dirname, "./read-rows-acceptance-test.json") From 4e262d1d66a6dc6a2b7f9c8743603ab79f5c1c09 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 31 Mar 2023 11:36:07 -0700 Subject: [PATCH 144/349] unwrap proto-plus object --- google/cloud/bigtable/row_merger.py | 25 +++++++++++++------------ tests/unit/test_read_rows_acceptance.py | 1 - 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 15557d1c6..c35187ab3 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -56,12 +56,14 @@ async def merge_row_stream( Consume chunks from a ReadRowsResponse stream into a set of Rows """ async for row_response in request_generator: - last_scanned = row_response.last_scanned_row_key + # unwrap protoplus object for increased performance + response_pb = row_response._pb + last_scanned = response_pb.last_scanned_row_key # if the server sends a scan heartbeat, notify the state machine. if last_scanned: yield self.state_machine.handle_last_scanned_row(last_scanned) # process new chunks through the state machine. - for chunk in row_response.chunks: + for chunk in response_pb.chunks: complete_row = self.state_machine.handle_chunk(chunk) if complete_row is not None: yield complete_row @@ -214,9 +216,9 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): raise InvalidChunk("reset chunk received when not processing row") if chunk.row_key: raise InvalidChunk("Reset chunk has a row key") - if "family_name" in chunk: + if chunk.family_name.value: raise InvalidChunk("Reset chunk has family_name") - if "qualifier" in chunk: + if chunk.qualifier.value: raise InvalidChunk("Reset chunk has qualifier") if chunk.timestamp_micros: raise InvalidChunk("Reset chunk has a timestamp") @@ -276,12 +278,12 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": is_split = chunk.value_size > 0 expected_cell_size = chunk.value_size if is_split else chunk_size # track latest cell data. New chunks won't send repeated data - if chunk.family_name: - self._owner.current_family = chunk.family_name - if not chunk.qualifier: + if chunk.family_name.value: + self._owner.current_family = chunk.family_name.value + if not chunk.qualifier.value: raise InvalidChunk("new column family must specify qualifier") - if chunk.qualifier: - self._owner.current_qualifier = chunk.qualifier + if chunk.qualifier.value: + self._owner.current_qualifier = chunk.qualifier.value if self._owner.current_family is None: raise InvalidChunk("family not found") @@ -289,7 +291,6 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": # key or the row is the same if chunk.row_key and chunk.row_key != self._owner.adapter.current_key: raise InvalidChunk("row key changed mid row") - self._owner.adapter.start_cell( family=self._owner.current_family, qualifier=self._owner.current_qualifier, @@ -320,9 +321,9 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": # ensure reset chunk matches expectations if chunk.row_key: raise InvalidChunk("found row key mid cell") - if "family_name" in chunk: + if chunk.family_name.value: raise InvalidChunk("In progress cell had a family name") - if "qualifier" in chunk: + if chunk.qualifier.value: raise InvalidChunk("In progress cell had a qualifier") if chunk.timestamp_micros: raise InvalidChunk("In progress cell had a timestamp") diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index 2e3f09f48..25a288150 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -1,7 +1,6 @@ import os from itertools import zip_longest -import proto import pytest from google.cloud.bigtable_v2 import ReadRowsResponse From fac018e70357f2f217bf63749535f23671724c86 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 31 Mar 2023 11:48:26 -0700 Subject: [PATCH 145/349] added test skeleton --- tests/unit/test_row_merger.py | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 tests/unit/test_row_merger.py diff --git a/tests/unit/test_row_merger.py b/tests/unit/test_row_merger.py new file mode 100644 index 000000000..9bd44dacc --- /dev/null +++ b/tests/unit/test_row_merger.py @@ -0,0 +1,36 @@ +import unittest + +class TestRowMerger(unittest.IsolatedAsyncioTestCase): + @staticmethod + def _get_target_class(): + from gspread_asyncio.row_merger import RowMerger + return RowMerger + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + +class TestStateMachine(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.row_merger import StateMachine + return StateMachine + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + +class TestState(unittest.TestCase): + pass + +class TestRowBuilder(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.row_merger import RowBuilder + return RowBuilder + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + From 9800a78a46d8fd434b73d7f927fa80f62245318e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 31 Mar 2023 15:11:40 -0700 Subject: [PATCH 146/349] working on tests --- google/cloud/bigtable/row_merger.py | 27 ++++++-------- tests/unit/test_row_merger.py | 57 +++++++++++++++++++++++++++-- 2 files changed, 66 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index c35187ab3..b8c01cb81 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -213,7 +213,7 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): """ # ensure reset chunk matches expectations if isinstance(self.current_state, AWAITING_NEW_ROW): - raise InvalidChunk("reset chunk received when not processing row") + raise InvalidChunk("Reset chunk received when not processing row") if chunk.row_key: raise InvalidChunk("Reset chunk has a row key") if chunk.family_name.value: @@ -274,29 +274,27 @@ class AWAITING_NEW_CELL(State): """ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": - chunk_size = len(chunk.value) is_split = chunk.value_size > 0 - expected_cell_size = chunk.value_size if is_split else chunk_size + # expected_cell_size = chunk.value_size if is_split else len(chunk.value) # track latest cell data. New chunks won't send repeated data if chunk.family_name.value: self._owner.current_family = chunk.family_name.value if not chunk.qualifier.value: - raise InvalidChunk("new column family must specify qualifier") + raise InvalidChunk("New column family must specify qualifier") if chunk.qualifier.value: self._owner.current_qualifier = chunk.qualifier.value if self._owner.current_family is None: - raise InvalidChunk("family not found") + raise InvalidChunk("Family not found") # ensure that all chunks after the first one are either missing a row # key or the row is the same if chunk.row_key and chunk.row_key != self._owner.adapter.current_key: - raise InvalidChunk("row key changed mid row") + raise InvalidChunk("Row key changed mid row") self._owner.adapter.start_cell( family=self._owner.current_family, qualifier=self._owner.current_qualifier, labels=list(chunk.labels), timestamp_micros=chunk.timestamp_micros, - size=expected_cell_size, ) self._owner.adapter.cell_value(chunk.value) # transition to new state @@ -320,7 +318,7 @@ class AWAITING_CELL_VALUE(State): def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": # ensure reset chunk matches expectations if chunk.row_key: - raise InvalidChunk("found row key mid cell") + raise InvalidChunk("Found row key mid cell") if chunk.family_name.value: raise InvalidChunk("In progress cell had a family name") if chunk.qualifier.value: @@ -381,15 +379,14 @@ def start_cell( qualifier: bytes | None, timestamp_micros: int, labels: List[str], - size: int, ) -> None: """called to start a new cell in a row.""" if not family: - raise InvalidChunk("missing family for a new cell") + raise InvalidChunk("Missing family for a new cell") if qualifier is None: - raise InvalidChunk("missing qualifier for a new cell") + raise InvalidChunk("Missing qualifier for a new cell") if self.current_key is None: - raise InvalidChunk("no row in progress") + raise InvalidChunk("start_cell called without a row") self.working_value = bytearray() self.working_cell = CellResponse( b"", self.current_key, family, qualifier, timestamp_micros, labels @@ -398,13 +395,13 @@ def start_cell( def cell_value(self, value: bytes) -> None: """called multiple times per cell to concatenate the cell value""" if self.working_value is None: - raise InvalidChunk("cell value received before start_cell") + raise InvalidChunk("Cell value received before start_cell") self.working_value.extend(value) def finish_cell(self) -> None: """called once per cell to signal the end of the value (unless reset)""" if self.working_cell is None or self.working_value is None: - raise InvalidChunk("cell value received before start_cell") + raise InvalidChunk("Cell value received before start_cell") self.working_cell.value = bytes(self.working_value) self.completed_cells.append(self.working_cell) self.working_cell = None @@ -413,7 +410,7 @@ def finish_cell(self) -> None: def finish_row(self) -> RowResponse: """called once per row to signal that all cells have been processed (unless reset)""" if self.current_key is None: - raise InvalidChunk("no row in progress") + raise InvalidChunk("No row in progress") new_row = RowResponse(self.current_key, self.completed_cells) self.reset() return new_row diff --git a/tests/unit/test_row_merger.py b/tests/unit/test_row_merger.py index 9bd44dacc..bd6e5d3d0 100644 --- a/tests/unit/test_row_merger.py +++ b/tests/unit/test_row_merger.py @@ -1,9 +1,17 @@ import unittest +from unittest import mock + +from google.cloud.bigtable.row_merger import InvalidChunk + +TEST_FAMILY = 'family_name' +TEST_QUALIFIER = b'column_qualifier' +TEST_TIMESTAMP = 123456789 +TEST_LABELS = ['label1', 'label2'] class TestRowMerger(unittest.IsolatedAsyncioTestCase): @staticmethod def _get_target_class(): - from gspread_asyncio.row_merger import RowMerger + from google.cloud.bigtable.row_merger import RowMerger return RowMerger def _make_one(self, *args, **kwargs): @@ -13,7 +21,7 @@ class TestStateMachine(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery.row_merger import StateMachine + from google.cloud.bigtable.row_merger import StateMachine return StateMachine def _make_one(self, *args, **kwargs): @@ -27,10 +35,53 @@ class TestRowBuilder(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery.row_merger import RowBuilder + from google.cloud.bigtable.row_merger import RowBuilder return RowBuilder def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) + def test_ctor(self): + with mock.patch('google.cloud.bigtable.row_merger.RowBuilder.reset') as reset: + self._make_one() + reset.assert_called_once() + row_builder = self._make_one() + self.assertIsNone(row_builder.current_key) + self.assertIsNone(row_builder.working_cell) + self.assertIsNone(row_builder.working_value) + self.assertEqual(row_builder.completed_cells, []) + + def test_start_row(self): + pass + + def test_start_cell(self): + # test with no family + with self.assertRaises(InvalidChunk): + self._make_one().start_cell('', + # test with no row + with self.assertRaises(InvalidChunk) as e: + row_builder = self._make_one() + row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + self.assertEqual(e.exception.message, 'start_cell called without a row') + + def test_cell_value_no_cell(self): + pass + + def test_cell_value(self): + pass + + def test_finish_cell(self): + pass + + def test_finish_cell_no_cell(self): + pass + + def test_finish_row(self): + pass + + def finish_row_no_row(self): + pass + + def test_reset(self): + pass From 8a22d150b3b089ae6c007eeb248d027bfbc411dc Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 31 Mar 2023 16:29:47 -0700 Subject: [PATCH 147/349] implement pool as custom grpc channel --- .../transports/pooled_grpc_asyncio.py | 181 +++++++++++++----- 1 file changed, 129 insertions(+), 52 deletions(-) diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index 834674108..d765d0952 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -16,6 +16,7 @@ import asyncio import warnings from functools import partialmethod +from functools import partial from typing import ( Awaitable, Callable, @@ -40,6 +41,114 @@ from .base import BigtableTransport, DEFAULT_CLIENT_INFO from .grpc import BigtableGrpcTransport +class PooledMultiCallable(): + + def __init__(self, channel_pool:"PooledChannel", *args, **kwargs): + self._next_idx = 0 + self._channel_pool = channel_pool + self._init_args = args + self._init_kwargs = kwargs + +class PooledUnaryUnaryMultiCallable(PooledMultiCallable, aio.UnaryUnaryMultiCallable): + def __call__(self, *args, **kwargs) -> aio.UnaryUnaryCall: + next_channel = self._channel_pool._pool[self._next_idx] + self._next_idx = (self._next_idx + 1) % len(self._callable_pool._pool) + return next_channel.unary_unary(*self._init_args, **self._init_args)(*args, **kwargs) + +class PooledUnaryStreamMultiCallable(PooledMultiCallable, aio.UnaryStreamMultiCallable): + def __call__(self, *args, **kwargs) -> aio.UnaryStreamCall: + next_channel = self._channel_pool._pool[self._next_idx] + self._next_idx = (self._next_idx + 1) % len(self._callable_pool._pool) + return next_channel.unary_stream(*self._init_args, **self._init_kwargs)(*args, **kwargs) + +class PooledStreamUnaryMultiCallable(PooledMultiCallable, aio.StreamUnaryMultiCallable): + def __call__(self, *args, **kwargs) -> aio.StreamUnaryCall: + next_channel = self._channel_pool._pool[self._next_idx] + self._next_idx = (self._next_idx + 1) % len(self._callable_pool._pool) + return next_channel.stream_unary(*self._init_args, **self._init_kwargs)(*args, **kwargs) + +class PooledStreamStreamMultiCallable(PooledMultiCallable, aio.StreamStreamMultiCallable): + def __call__(self, *args, **kwargs) -> aio.StreamStreamCall: + next_channel = self._channel_pool._pool[self._next_idx] + self._next_idx = (self._next_idx + 1) % len(self._callable_pool._pool) + return next_channel.stream_stream(*self._init_args, **self._init_kwargs)(*args, **kwargs) + +class PooledChannel(aio.Channel): + + def __init__( + self, + pool_size: int = 3, + host: str = "bigtable.googleapis.com", + credentials: Optional[ga_credentials.Credentials] = None, + credentials_file: Optional[str] = None, + scopes: Optional[Sequence[str]] = None, + quota_project_id: Optional[str] = None, + **kwargs, + ): + self._pool : List[aio.Channel] = [] + self._create_channel = partial(grpc_helpers_async.create_channel, target=host, credentials=credentials, credentials_file=credentials_file, scopes=scopes, quota_project_id=quota_project_id, **kwargs) + for i in range(pool_size): + self._pool.append(self._create_channel()) + + def unary_unary(self, *args, **kwargs) -> grpc.aio.UnaryUnaryMultiCallable: + return PooledUnaryUnaryMultiCallable(self, *args, **kwargs) + + def unary_stream(self, *args, **kwargs) -> grpc.aio.UnaryStreamMultiCallable: + return PooledUnaryStreamMultiCallable(self, *args, **kwargs) + + def stream_unary(self, *args, **kwargs) -> grpc.aio.StreamUnaryMultiCallable: + return PooledStreamUnaryMultiCallable(self, *args, **kwargs) + + def stream_stream(self, *args, **kwargs) -> grpc.aio.StreamStreamMultiCallable: + return PooledStreamStreamMultiCallable(self, *args, **kwargs) + + async def close(self, grace=None): + close_fns = [channel.close(grace=grace) for channel in self.channel_pool] + return asyncio.gather(*close_fns) + + async def channel_ready(self): + ready_fns = [channel.channel_ready() for channel in self.channel_pool] + return asyncio.gather(*ready_fns) + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() + + def get_state(self, try_to_connect: bool = False) -> grpc.ChannelConnectivity: + raise NotImplementedError() + + async def wait_for_state_change(self, last_observed_state): + raise NotImplementedError() + + async def replace_channel( + self, channel_idx, grace=None, new_channel=None + ) -> aio.Channel: + """ + Replaces a channel in the pool with a fresh one. + + The `new_channel` will start processing new requests immidiately, + but the old channel will continue serving existing clients for `grace` seconds + + Args: + channel_idx(int): the channel index in the pool to replace + grace(Optional[float]): The time to wait until all active RPCs are + finished. If a grace period is not specified (by passing None for + grace), all existing RPCs are cancelled immediately. + new_channel(grpc.aio.Channel): a new channel to insert into the pool + at `channel_idx`. If `None`, a new channel will be created. + """ + if channel_idx >= len(self.channel_pool) or channel_idx < 0: + raise ValueError( + f"invalid channel_idx {channel_idx} for pool size {len(self.channel_pool)}" + ) + if new_channel is None: + new_channel = self._create_channel() + old_channel = self._pool[channel_idx] + self._pool[channel_idx] = new_channel + await old_channel.close(grace=grace) + return new_channel class PooledBigtableGrpcAsyncIOTransport(BigtableTransport): """Pooled gRPC AsyncIO backend transport for Bigtable. @@ -106,14 +215,13 @@ def create_channel( aio.Channel: A gRPC AsyncIO channel object. """ - return grpc_helpers_async.create_channel( + return PooledChannel( + 3, host, credentials=credentials, credentials_file=credentials_file, - quota_project_id=quota_project_id, - default_scopes=cls.AUTH_SCOPES, scopes=scopes, - default_host=cls.DEFAULT_HOST, + quota_project_id=quota_project_id, **kwargs, ) @@ -226,9 +334,7 @@ def __init__( api_audience=api_audience, ) self._quota_project_id = quota_project_id - self.channel_pool: List[aio.Channel] = [] - for i in range(pool_size): - new_channel = type(self).create_channel( + self._grpc_channel = type(self).create_channel( self._host, # use the credentials which are saved credentials=self._credentials, @@ -243,17 +349,17 @@ def __init__( ("grpc.max_receive_message_length", -1), ], ) - self.channel_pool.append(new_channel) - # Wrap messages. This must be done after self.channel_pool is populated self._prep_wrapped_messages(client_info) - def next_channel(self) -> aio.Channel: - """Returns the next channel in the round robin pool.""" + @property + def grpc_channel(self) -> aio.Channel: + """Create the channel designed to connect to this service. + This property caches on the instance; repeated calls return + the same channel. + """ # Return the channel from cache. - channel = self.channel_pool[self._next_idx] - self._next_idx = (self._next_idx + 1) % len(self.channel_pool) - return channel + return self._grpc_channel async def replace_channel( self, channel_idx, grace=None, new_channel=None @@ -272,43 +378,19 @@ async def replace_channel( new_channel(grpc.aio.Channel): a new channel to insert into the pool at `channel_idx`. If `None`, a new channel will be created. """ - if channel_idx >= len(self.channel_pool) or channel_idx < 0: - raise ValueError( - f"invalid channel_idx {channel_idx} for pool size {len(self.channel_pool)}" - ) - if new_channel is None: - new_channel = self.create_channel( - self._host, - credentials=self._credentials, - credentials_file=None, - scopes=self._scopes, - ssl_credentials=self._ssl_channel_credentials, - quota_project_id=self._quota_project_id, - options=[ - ("grpc.max_send_message_length", -1), - ("grpc.max_receive_message_length", -1), - ], - ) - old_channel = self.channel_pool[channel_idx] - self.channel_pool[channel_idx] = new_channel - await old_channel.close(grace=grace) - # invalidate stubs - stub_keys = list(self._stubs.keys()) - for stub_channel, stub_func in stub_keys: - if stub_channel == old_channel: - del self._stubs[(stub_channel, stub_func)] - return new_channel - - def read_rows(self, *args, **kwargs) -> Awaitable[bigtable.ReadRowsResponse]: - r"""Function for calling the read rows method over gRPC. + return await self._grpc_channel.replace_channel(channel_idx, grace, new_channel) + @property + def read_rows( + self, + ) -> Callable[[bigtable.ReadRowsRequest], Awaitable[bigtable.ReadRowsResponse]]: + r"""Return a callable for the read rows method over gRPC. Streams back the contents of all requested rows in key order, optionally applying the same Reader filter to each. Depending on their size, rows and cells may be broken up across multiple responses, but atomicity of each row will still be preserved. See the ReadRowsResponse documentation for details. - Returns: Callable[[~.ReadRowsRequest], Awaitable[~.ReadRowsResponse]]: @@ -319,18 +401,13 @@ def read_rows(self, *args, **kwargs) -> Awaitable[bigtable.ReadRowsResponse]: # the request. # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. - next_channel = self.next_channel() - stub_key = (next_channel, "read_rows") - stub_func = self._stubs.get(stub_key, None) - if stub_func is None: - stub_func = next_channel.unary_stream( + if "read_rows" not in self._stubs: + self._stubs["read_rows"] = self.grpc_channel.unary_stream( "/google.bigtable.v2.Bigtable/ReadRows", request_serializer=bigtable.ReadRowsRequest.serialize, response_deserializer=bigtable.ReadRowsResponse.deserialize, ) - self._stubs[stub_key] = stub_func - # call stub - return stub_func(*args, **kwargs) + return self._stubs["read_rows"] def sample_row_keys( self, *args, **kwargs From 38e5662b2e4febe780e00a53b2a58ebed8f5baaa Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 31 Mar 2023 17:03:30 -0700 Subject: [PATCH 148/349] did some restructuring --- .../transports/pooled_grpc_asyncio.py | 223 ++++++++---------- tests/unit/gapic/bigtable_v2/test_bigtable.py | 72 +++--- 2 files changed, 136 insertions(+), 159 deletions(-) diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index d765d0952..fa29190cc 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -44,34 +44,25 @@ class PooledMultiCallable(): def __init__(self, channel_pool:"PooledChannel", *args, **kwargs): - self._next_idx = 0 - self._channel_pool = channel_pool self._init_args = args self._init_kwargs = kwargs + self.next_channel_fn = channel_pool.next_channel class PooledUnaryUnaryMultiCallable(PooledMultiCallable, aio.UnaryUnaryMultiCallable): def __call__(self, *args, **kwargs) -> aio.UnaryUnaryCall: - next_channel = self._channel_pool._pool[self._next_idx] - self._next_idx = (self._next_idx + 1) % len(self._callable_pool._pool) - return next_channel.unary_unary(*self._init_args, **self._init_args)(*args, **kwargs) + return self.next_channel_fn().unary_unary(*self._init_args, **self._init_kwargs)(*args, **kwargs) class PooledUnaryStreamMultiCallable(PooledMultiCallable, aio.UnaryStreamMultiCallable): def __call__(self, *args, **kwargs) -> aio.UnaryStreamCall: - next_channel = self._channel_pool._pool[self._next_idx] - self._next_idx = (self._next_idx + 1) % len(self._callable_pool._pool) - return next_channel.unary_stream(*self._init_args, **self._init_kwargs)(*args, **kwargs) + return self.next_channel_fn().unary_stream(*self._init_args, **self._init_kwargs)(*args, **kwargs) class PooledStreamUnaryMultiCallable(PooledMultiCallable, aio.StreamUnaryMultiCallable): def __call__(self, *args, **kwargs) -> aio.StreamUnaryCall: - next_channel = self._channel_pool._pool[self._next_idx] - self._next_idx = (self._next_idx + 1) % len(self._callable_pool._pool) - return next_channel.stream_unary(*self._init_args, **self._init_kwargs)(*args, **kwargs) + return self.next_channel_fn().stream_unary(*self._init_args, **self._init_kwargs)(*args, **kwargs) class PooledStreamStreamMultiCallable(PooledMultiCallable, aio.StreamStreamMultiCallable): def __call__(self, *args, **kwargs) -> aio.StreamStreamCall: - next_channel = self._channel_pool._pool[self._next_idx] - self._next_idx = (self._next_idx + 1) % len(self._callable_pool._pool) - return next_channel.stream_stream(*self._init_args, **self._init_kwargs)(*args, **kwargs) + return self.next_channel_fn().stream_stream(*self._init_args, **self._init_kwargs)(*args, **kwargs) class PooledChannel(aio.Channel): @@ -86,10 +77,16 @@ def __init__( **kwargs, ): self._pool : List[aio.Channel] = [] + self._next_idx = 0 self._create_channel = partial(grpc_helpers_async.create_channel, target=host, credentials=credentials, credentials_file=credentials_file, scopes=scopes, quota_project_id=quota_project_id, **kwargs) for i in range(pool_size): self._pool.append(self._create_channel()) + def next_channel(self) -> aio.Channel: + channel = self._pool[self._next_idx] + self._next_idx = (self._next_idx + 1) % len(self._pool) + return channel + def unary_unary(self, *args, **kwargs) -> grpc.aio.UnaryUnaryMultiCallable: return PooledUnaryUnaryMultiCallable(self, *args, **kwargs) @@ -103,11 +100,11 @@ def stream_stream(self, *args, **kwargs) -> grpc.aio.StreamStreamMultiCallable: return PooledStreamStreamMultiCallable(self, *args, **kwargs) async def close(self, grace=None): - close_fns = [channel.close(grace=grace) for channel in self.channel_pool] + close_fns = [channel.close(grace=grace) for channel in self._pool] return asyncio.gather(*close_fns) async def channel_ready(self): - ready_fns = [channel.channel_ready() for channel in self.channel_pool] + ready_fns = [channel.channel_ready() for channel in self._pool] return asyncio.gather(*ready_fns) async def __aenter__(self): @@ -139,9 +136,9 @@ async def replace_channel( new_channel(grpc.aio.Channel): a new channel to insert into the pool at `channel_idx`. If `None`, a new channel will be created. """ - if channel_idx >= len(self.channel_pool) or channel_idx < 0: + if channel_idx >= len(self._pool) or channel_idx < 0: raise ValueError( - f"invalid channel_idx {channel_idx} for pool size {len(self.channel_pool)}" + f"invalid channel_idx {channel_idx} for pool size {len(self._pool)}" ) if new_channel is None: new_channel = self._create_channel() @@ -186,6 +183,7 @@ class PooledTransportFixed(cls): @classmethod def create_channel( cls, + pool_size: int = 3, host: str = "bigtable.googleapis.com", credentials: Optional[ga_credentials.Credentials] = None, credentials_file: Optional[str] = None, @@ -216,7 +214,7 @@ def create_channel( """ return PooledChannel( - 3, + pool_size, host, credentials=credentials, credentials_file=credentials_file, @@ -335,6 +333,7 @@ def __init__( ) self._quota_project_id = quota_project_id self._grpc_channel = type(self).create_channel( + pool_size, self._host, # use the credentials which are saved credentials=self._credentials, @@ -349,7 +348,7 @@ def __init__( ("grpc.max_receive_message_length", -1), ], ) - # Wrap messages. This must be done after self.channel_pool is populated + # Wrap messages. This must be done after pool is populated self._prep_wrapped_messages(client_info) @property @@ -409,17 +408,19 @@ def read_rows( ) return self._stubs["read_rows"] - def sample_row_keys( - self, *args, **kwargs - ) -> Awaitable[bigtable.SampleRowKeysResponse]: - r"""Function for calling the sample row keys method over gRPC. + @property + def sample_row_keys( + self, + ) -> Callable[ + [bigtable.SampleRowKeysRequest], Awaitable[bigtable.SampleRowKeysResponse] + ]: + """Return a callable for the sample row keys method over gRPC. Returns a sample of row keys in the table. The returned row keys will delimit contiguous sections of the table of approximately equal size, which can be used to break up the data for distributed tasks like mapreduces. - Returns: Callable[[~.SampleRowKeysRequest], Awaitable[~.SampleRowKeysResponse]]: @@ -430,25 +431,21 @@ def sample_row_keys( # the request. # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. - next_channel = self.next_channel() - stub_key = (next_channel, "sample_row_keys") - stub_func = self._stubs.get(stub_key, None) - if stub_func is None: - stub_func = next_channel.unary_stream( + if "sample_row_keys" not in self._stubs: + self._stubs["sample_row_keys"] = self.grpc_channel.unary_stream( "/google.bigtable.v2.Bigtable/SampleRowKeys", request_serializer=bigtable.SampleRowKeysRequest.serialize, response_deserializer=bigtable.SampleRowKeysResponse.deserialize, ) - self._stubs[stub_key] = stub_func - # call stub - return stub_func(*args, **kwargs) - - def mutate_row(self, *args, **kwargs) -> Awaitable[bigtable.MutateRowResponse]: - r"""Function for calling the mutate row method over gRPC. + return self._stubs["sample_row_keys"] + @property + def mutate_row( + self, + ) -> Callable[[bigtable.MutateRowRequest], Awaitable[bigtable.MutateRowResponse]]: + r"""Return a callable for the mutate row method over gRPC. Mutates a row atomically. Cells already present in the row are left unchanged unless explicitly changed by ``mutation``. - Returns: Callable[[~.MutateRowRequest], Awaitable[~.MutateRowResponse]]: @@ -459,26 +456,22 @@ def mutate_row(self, *args, **kwargs) -> Awaitable[bigtable.MutateRowResponse]: # the request. # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. - next_channel = self.next_channel() - stub_key = (next_channel, "mutate_row") - stub_func = self._stubs.get(stub_key, None) - if stub_func is None: - stub_func = next_channel.unary_unary( + if "mutate_row" not in self._stubs: + self._stubs["mutate_row"] = self.grpc_channel.unary_unary( "/google.bigtable.v2.Bigtable/MutateRow", request_serializer=bigtable.MutateRowRequest.serialize, response_deserializer=bigtable.MutateRowResponse.deserialize, ) - self._stubs[stub_key] = stub_func - # call stub - return stub_func(*args, **kwargs) - - def mutate_rows(self, *args, **kwargs) -> Awaitable[bigtable.MutateRowsResponse]: - r"""Function for calling the mutate rows method over gRPC. + return self._stubs["mutate_row"] + @property + def mutate_rows( + self, + ) -> Callable[[bigtable.MutateRowsRequest], Awaitable[bigtable.MutateRowsResponse]]: + """Return a callable for the mutate rows method over gRPC. Mutates multiple rows in a batch. Each individual row is mutated atomically as in MutateRow, but the entire batch is not executed atomically. - Returns: Callable[[~.MutateRowsRequest], Awaitable[~.MutateRowsResponse]]: @@ -489,27 +482,24 @@ def mutate_rows(self, *args, **kwargs) -> Awaitable[bigtable.MutateRowsResponse] # the request. # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. - next_channel = self.next_channel() - stub_key = (next_channel, "mutate_rows") - stub_func = self._stubs.get(stub_key, None) - if stub_func is None: - stub_func = next_channel.unary_stream( + if "mutate_rows" not in self._stubs: + self._stubs["mutate_rows"] = self.grpc_channel.unary_stream( "/google.bigtable.v2.Bigtable/MutateRows", request_serializer=bigtable.MutateRowsRequest.serialize, response_deserializer=bigtable.MutateRowsResponse.deserialize, ) - self._stubs[stub_key] = stub_func - # call stub - return stub_func(*args, **kwargs) + return self._stubs["mutate_rows"] + @property def check_and_mutate_row( - self, *args, **kwargs - ) -> Awaitable[bigtable.CheckAndMutateRowResponse]: - r"""Function for calling the check and mutate row method over gRPC. - + self, + ) -> Callable[ + [bigtable.CheckAndMutateRowRequest], + Awaitable[bigtable.CheckAndMutateRowResponse], + ]: + """Return a callable for the check and mutate row method over gRPC. Mutates a row atomically based on the output of a predicate Reader filter. - Returns: Callable[[~.CheckAndMutateRowRequest], Awaitable[~.CheckAndMutateRowResponse]]: @@ -520,26 +510,24 @@ def check_and_mutate_row( # the request. # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. - next_channel = self.next_channel() - stub_key = (next_channel, "check_and_mutate_row") - stub_func = self._stubs.get(stub_key, None) - if stub_func is None: - stub_func = next_channel.unary_unary( + if "check_and_mutate_row" not in self._stubs: + self._stubs["check_and_mutate_row"] = self.grpc_channel.unary_unary( "/google.bigtable.v2.Bigtable/CheckAndMutateRow", request_serializer=bigtable.CheckAndMutateRowRequest.serialize, response_deserializer=bigtable.CheckAndMutateRowResponse.deserialize, ) - self._stubs[stub_key] = stub_func - # call stub - return stub_func(*args, **kwargs) - - def ping_and_warm(self, *args, **kwargs) -> Awaitable[bigtable.PingAndWarmResponse]: - r"""Function for calling the ping and warm method over gRPC. + return self._stubs["check_and_mutate_row"] + @property + def ping_and_warm( + self, + ) -> Callable[ + [bigtable.PingAndWarmRequest], Awaitable[bigtable.PingAndWarmResponse] + ]: + """Return a callable for the ping and warm method over gRPC. Warm up associated instance metadata for this connection. This call is not required but may be useful for connection keep-alive. - Returns: Callable[[~.PingAndWarmRequest], Awaitable[~.PingAndWarmResponse]]: @@ -550,24 +538,22 @@ def ping_and_warm(self, *args, **kwargs) -> Awaitable[bigtable.PingAndWarmRespon # the request. # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. - next_channel = self.next_channel() - stub_key = (next_channel, "ping_and_warm") - stub_func = self._stubs.get(stub_key, None) - if stub_func is None: - stub_func = next_channel.unary_unary( + if "ping_and_warm" not in self._stubs: + self._stubs["ping_and_warm"] = self.grpc_channel.unary_unary( "/google.bigtable.v2.Bigtable/PingAndWarm", request_serializer=bigtable.PingAndWarmRequest.serialize, response_deserializer=bigtable.PingAndWarmResponse.deserialize, ) - self._stubs[stub_key] = stub_func - # call stub - return stub_func(*args, **kwargs) + return self._stubs["ping_and_warm"] + @property def read_modify_write_row( - self, *args, **kwargs - ) -> Awaitable[bigtable.ReadModifyWriteRowResponse]: - r"""Function for calling the read modify write row method over gRPC. - + self, + ) -> Callable[ + [bigtable.ReadModifyWriteRowRequest], + Awaitable[bigtable.ReadModifyWriteRowResponse], + ]: + """Return a callable for the read modify write row method over gRPC. Modifies a row atomically on the server. The method reads the latest existing timestamp and value from the specified columns and writes a new entry based on @@ -575,7 +561,6 @@ def read_modify_write_row( the timestamp is the greater of the existing timestamp or the current server time. The method returns the new contents of all modified cells. - Returns: Callable[[~.ReadModifyWriteRowRequest], Awaitable[~.ReadModifyWriteRowResponse]]: @@ -586,30 +571,28 @@ def read_modify_write_row( # the request. # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. - next_channel = self.next_channel() - stub_key = (next_channel, "read_modify_write_row") - stub_func = self._stubs.get(stub_key, None) - if stub_func is None: - stub_func = next_channel.unary_unary( + if "read_modify_write_row" not in self._stubs: + self._stubs["read_modify_write_row"] = self.grpc_channel.unary_unary( "/google.bigtable.v2.Bigtable/ReadModifyWriteRow", request_serializer=bigtable.ReadModifyWriteRowRequest.serialize, response_deserializer=bigtable.ReadModifyWriteRowResponse.deserialize, ) - self._stubs[stub_key] = stub_func - # call stub - return stub_func(*args, **kwargs) + return self._stubs["read_modify_write_row"] + + @property def generate_initial_change_stream_partitions( - self, *args, **kwargs - ) -> Awaitable[bigtable.GenerateInitialChangeStreamPartitionsResponse]: - r"""Function for calling the generate initial change stream + self, + ) -> Callable[ + [bigtable.GenerateInitialChangeStreamPartitionsRequest], + Awaitable[bigtable.GenerateInitialChangeStreamPartitionsResponse], + ]: + r"""Return a callable for the generate initial change stream partitions method over gRPC. - NOTE: This API is intended to be used by Apache Beam BigtableIO. Returns the current list of partitions that make up the table's change stream. The union of partitions will cover the entire keyspace. Partitions can be read with ``ReadChangeStream``. - Returns: Callable[[~.GenerateInitialChangeStreamPartitionsRequest], Awaitable[~.GenerateInitialChangeStreamPartitionsResponse]]: @@ -620,29 +603,27 @@ def generate_initial_change_stream_partitions( # the request. # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. - next_channel = self.next_channel() - stub_key = (next_channel, "generate_initial_change_stream_partitions") - stub_func = self._stubs.get(stub_key, None) - if stub_func is None: - stub_func = next_channel.unary_stream( + if "generate_initial_change_stream_partitions" not in self._stubs: + self._stubs[ + "generate_initial_change_stream_partitions" + ] = self.grpc_channel.unary_stream( "/google.bigtable.v2.Bigtable/GenerateInitialChangeStreamPartitions", request_serializer=bigtable.GenerateInitialChangeStreamPartitionsRequest.serialize, response_deserializer=bigtable.GenerateInitialChangeStreamPartitionsResponse.deserialize, ) - self._stubs[stub_key] = stub_func - # call stub - return stub_func(*args, **kwargs) + return self._stubs["generate_initial_change_stream_partitions"] + @property def read_change_stream( - self, *args, **kwargs - ) -> Awaitable[bigtable.ReadChangeStreamResponse]: - r"""Function for calling the read change stream method over gRPC. - + self, + ) -> Callable[ + [bigtable.ReadChangeStreamRequest], Awaitable[bigtable.ReadChangeStreamResponse] + ]: + r"""Return a callable for the read change stream method over gRPC. NOTE: This API is intended to be used by Apache Beam BigtableIO. Reads changes from a table's change stream. Changes will reflect both user-initiated mutations and mutations that are caused by garbage collection. - Returns: Callable[[~.ReadChangeStreamRequest], Awaitable[~.ReadChangeStreamResponse]]: @@ -653,22 +634,18 @@ def read_change_stream( # the request. # gRPC handles serialization and deserialization, so we just need # to pass in the functions for each. - next_channel = self.next_channel() - stub_key = (next_channel, "read_change_stream") - stub_func = self._stubs.get(stub_key, None) - if stub_func is None: - stub_func = next_channel.unary_stream( + if "read_change_stream" not in self._stubs: + self._stubs["read_change_stream"] = self.grpc_channel.unary_stream( "/google.bigtable.v2.Bigtable/ReadChangeStream", request_serializer=bigtable.ReadChangeStreamRequest.serialize, response_deserializer=bigtable.ReadChangeStreamResponse.deserialize, ) - self._stubs[stub_key] = stub_func - # call stub - return stub_func(*args, **kwargs) + return self._stubs["read_change_stream"] + + def close(self): - close_fns = [channel.close() for channel in self.channel_pool] - return asyncio.gather(*close_fns) + return self.grpc_channel.close() __all__ = ("PooledBigtableGrpcAsyncIOTransport",) diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index 0337d2f08..99ec770c9 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -753,8 +753,8 @@ def test_read_rows_pooled_rotation(transport: str = "pooled_grpc_asyncio"): # and we are mocking out the actual API, so just send an empty request. request = {} - with mock.patch.object(type(client.transport), "next_channel") as next_channel: - channel = client.transport.channel_pool[client.transport._next_idx] + with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: + channel = client.transport._grpc_channel._pool[client.transport._next_idx] next_channel.return_value = channel response = client.read_rows(request) @@ -1000,8 +1000,8 @@ def test_sample_row_keys_pooled_rotation(transport: str = "pooled_grpc_asyncio") # and we are mocking out the actual API, so just send an empty request. request = {} - with mock.patch.object(type(client.transport), "next_channel") as next_channel: - channel = client.transport.channel_pool[client.transport._next_idx] + with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: + channel = client.transport._grpc_channel._pool[client.transport._next_idx] next_channel.return_value = channel response = client.sample_row_keys(request) @@ -1246,8 +1246,8 @@ def test_mutate_row_pooled_rotation(transport: str = "pooled_grpc_asyncio"): # and we are mocking out the actual API, so just send an empty request. request = {} - with mock.patch.object(type(client.transport), "next_channel") as next_channel: - channel = client.transport.channel_pool[client.transport._next_idx] + with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: + channel = client.transport._grpc_channel._pool[client.transport._next_idx] next_channel.return_value = channel response = client.mutate_row(request) @@ -1537,8 +1537,8 @@ def test_mutate_rows_pooled_rotation(transport: str = "pooled_grpc_asyncio"): # and we are mocking out the actual API, so just send an empty request. request = {} - with mock.patch.object(type(client.transport), "next_channel") as next_channel: - channel = client.transport.channel_pool[client.transport._next_idx] + with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: + channel = client.transport._grpc_channel._pool[client.transport._next_idx] next_channel.return_value = channel response = client.mutate_rows(request) @@ -1798,8 +1798,8 @@ def test_check_and_mutate_row_pooled_rotation(transport: str = "pooled_grpc_asyn # and we are mocking out the actual API, so just send an empty request. request = {} - with mock.patch.object(type(client.transport), "next_channel") as next_channel: - channel = client.transport.channel_pool[client.transport._next_idx] + with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: + channel = client.transport._grpc_channel._pool[client.transport._next_idx] next_channel.return_value = channel response = client.check_and_mutate_row(request) @@ -2203,8 +2203,8 @@ def test_ping_and_warm_pooled_rotation(transport: str = "pooled_grpc_asyncio"): # and we are mocking out the actual API, so just send an empty request. request = {} - with mock.patch.object(type(client.transport), "next_channel") as next_channel: - channel = client.transport.channel_pool[client.transport._next_idx] + with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: + channel = client.transport._grpc_channel._pool[client.transport._next_idx] next_channel.return_value = channel response = client.ping_and_warm(request) @@ -2451,8 +2451,8 @@ def test_read_modify_write_row_pooled_rotation(transport: str = "pooled_grpc_asy # and we are mocking out the actual API, so just send an empty request. request = {} - with mock.patch.object(type(client.transport), "next_channel") as next_channel: - channel = client.transport.channel_pool[client.transport._next_idx] + with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: + channel = client.transport._grpc_channel._pool[client.transport._next_idx] next_channel.return_value = channel response = client.read_modify_write_row(request) @@ -2740,8 +2740,8 @@ def test_generate_initial_change_stream_partitions_pooled_rotation( # and we are mocking out the actual API, so just send an empty request. request = {} - with mock.patch.object(type(client.transport), "next_channel") as next_channel: - channel = client.transport.channel_pool[client.transport._next_idx] + with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: + channel = client.transport._grpc_channel._pool[client.transport._next_idx] next_channel.return_value = channel response = client.generate_initial_change_stream_partitions(request) @@ -3027,8 +3027,8 @@ def test_read_change_stream_pooled_rotation(transport: str = "pooled_grpc_asynci # and we are mocking out the actual API, so just send an empty request. request = {} - with mock.patch.object(type(client.transport), "next_channel") as next_channel: - channel = client.transport.channel_pool[client.transport._next_idx] + with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: + channel = client.transport._grpc_channel._pool[client.transport._next_idx] next_channel.return_value = channel response = client.read_change_stream(request) @@ -6700,8 +6700,8 @@ async def test_pooled_transport_close_async(): credentials=ga_credentials.AnonymousCredentials(), transport="pooled_grpc_asyncio", ) - num_channels = len(client.transport.channel_pool) - with mock.patch.object(type(client.transport.channel_pool[0]), "close") as close: + num_channels = len(client.transport._grpc_channel._pool) + with mock.patch.object(type(client.transport._grpc_channel._pool[0]), "close") as close: async with client: close.assert_not_called() close.assert_called() @@ -6781,24 +6781,24 @@ async def test_pooled_transport_replace_default(): credentials=ga_credentials.AnonymousCredentials(), transport="pooled_grpc_asyncio", ) - num_channels = len(client.transport.channel_pool) + num_channels = len(client.transport._grpc_channel._pool) for replace_idx in range(num_channels): - prev_pool = [channel for channel in client.transport.channel_pool] + prev_pool = [channel for channel in client.transport._grpc_channel._pool] grace_period = 4 with mock.patch.object( - type(client.transport.channel_pool[0]), "close" + type(client.transport._grpc_channel._pool[0]), "close" ) as close: await client.transport.replace_channel(replace_idx, grace=grace_period) close.assert_called_once() close.assert_awaited() close.assert_called_with(grace=grace_period) - assert isinstance(client.transport.channel_pool[replace_idx], grpc.aio.Channel) + assert isinstance(client.transport._grpc_channel._pool[replace_idx], grpc.aio.Channel) # only the specified channel should be replaced for i in range(num_channels): if i == replace_idx: - assert client.transport.channel_pool[i] != prev_pool[i] + assert client.transport._grpc_channel._pool[i] != prev_pool[i] else: - assert client.transport.channel_pool[i] == prev_pool[i] + assert client.transport._grpc_channel._pool[i] == prev_pool[i] with pytest.raises(ValueError): await client.transport.replace_channel(num_channels + 1) with pytest.raises(ValueError): @@ -6811,12 +6811,12 @@ async def test_pooled_transport_replace_explicit(): credentials=ga_credentials.AnonymousCredentials(), transport="pooled_grpc_asyncio", ) - num_channels = len(client.transport.channel_pool) + num_channels = len(client.transport._grpc_channel._pool) for replace_idx in range(num_channels): - prev_pool = [channel for channel in client.transport.channel_pool] + prev_pool = [channel for channel in client.transport._grpc_channel._pool] grace_period = 0 with mock.patch.object( - type(client.transport.channel_pool[0]), "close" + type(client.transport._grpc_channel._pool[0]), "close" ) as close: new_channel = grpc.aio.insecure_channel("localhost:8080") await client.transport.replace_channel( @@ -6825,13 +6825,13 @@ async def test_pooled_transport_replace_explicit(): close.assert_called_once() close.assert_awaited() close.assert_called_with(grace=grace_period) - assert client.transport.channel_pool[replace_idx] == new_channel + assert client.transport._grpc_channel._pool[replace_idx] == new_channel # only the specified channel should be replaced for i in range(num_channels): if i == replace_idx: - assert client.transport.channel_pool[i] != prev_pool[i] + assert client.transport._grpc_channel._pool[i] != prev_pool[i] else: - assert client.transport.channel_pool[i] == prev_pool[i] + assert client.transport._grpc_channel._pool[i] == prev_pool[i] def test_pooled_transport_next_channel(): @@ -6840,7 +6840,7 @@ def test_pooled_transport_next_channel(): credentials=ga_credentials.AnonymousCredentials(), pool_size=num_channels, ) - assert len(transport.channel_pool) == num_channels + assert len(transport._grpc_channel._pool) == num_channels transport._next_idx = 0 # rotate through all channels multiple times num_cycles = 4 @@ -6848,12 +6848,12 @@ def test_pooled_transport_next_channel(): for i in range(num_channels - 1): assert transport._next_idx == i got_channel = transport.next_channel() - assert got_channel == transport.channel_pool[i] + assert got_channel == transport._grpc_channel._pool[i] assert transport._next_idx == (i + 1) # test wrap around assert transport._next_idx == num_channels - 1 got_channel = transport.next_channel() - assert got_channel == transport.channel_pool[num_channels - 1] + assert got_channel == transport._grpc_channel._pool[num_channels - 1] assert transport._next_idx == 0 @@ -6864,7 +6864,7 @@ def test_pooled_transport_pool_unique_channels(): credentials=ga_credentials.AnonymousCredentials(), pool_size=num_channels, ) - channel_list = [channel for channel in transport.channel_pool] + channel_list = [channel for channel in transport._grpc_channel._pool] channel_set = set(channel_list) assert len(channel_list) == num_channels assert len(channel_set) == num_channels From 5155800b842933644b2046efd23a9495a11f2a05 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 31 Mar 2023 17:54:48 -0700 Subject: [PATCH 149/349] got some tests working --- google/cloud/bigtable/client.py | 6 +- .../transports/pooled_grpc_asyncio.py | 3 +- tests/unit/gapic/bigtable_v2/test_bigtable.py | 75 +++++++++---------- 3 files changed, 39 insertions(+), 45 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 0ab43188e..d4c162b2f 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -135,7 +135,7 @@ def start_background_channel_refresh(self) -> None: if not self._channel_refresh_tasks: # raise RuntimeError if there is no event loop asyncio.get_running_loop() - for channel_idx in range(len(self.transport.channel_pool)): + for channel_idx in range(len(self.transport._grpc_channel._pool)): refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) if sys.version_info >= (3, 8): refresh_task.set_name( @@ -211,7 +211,7 @@ async def _manage_channel( next_sleep = max(first_refresh - time.time(), 0) if next_sleep > 0: # warm the current channel immediately - channel = self.transport.channel_pool[channel_idx] + channel = self.transport._grpc_channel._pool[channel_idx] await self._ping_and_warm_instances(channel) # continuously refresh the channel every `refresh_interval` seconds while True: @@ -249,7 +249,7 @@ async def register_instance(self, instance_id: str): if self._channel_refresh_tasks: # refresh tasks already running # call ping and warm on all existing channels - for channel in self.transport.channel_pool: + for channel in self.transport._grpc_channel._pool: await self._ping_and_warm_instances(channel) else: # refresh tasks aren't active. start them as background tasks diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index fa29190cc..87b568c4c 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -101,7 +101,7 @@ def stream_stream(self, *args, **kwargs) -> grpc.aio.StreamStreamMultiCallable: async def close(self, grace=None): close_fns = [channel.close(grace=grace) for channel in self._pool] - return asyncio.gather(*close_fns) + return await asyncio.gather(*close_fns) async def channel_ready(self): ready_fns = [channel.channel_ready() for channel in self._pool] @@ -293,7 +293,6 @@ def __init__( raise ValueError(f"invalid pool_size: {pool_size}") self._ssl_channel_credentials = ssl_channel_credentials self._stubs: Dict[Tuple[aio.Channel, str], Callable] = {} - self._next_idx = 0 if api_mtls_endpoint: warnings.warn("api_mtls_endpoint is deprecated", DeprecationWarning) diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index 99ec770c9..23250124c 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -754,7 +754,7 @@ def test_read_rows_pooled_rotation(transport: str = "pooled_grpc_asyncio"): request = {} with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._next_idx] + channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] next_channel.return_value = channel response = client.read_rows(request) @@ -765,7 +765,7 @@ def test_read_rows_pooled_rotation(transport: str = "pooled_grpc_asyncio"): stub_key = (channel, "read_rows") assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel - starting_idx = client.transport._next_idx + starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): response = client.read_rows(request) assert next_channel.call_count == i @@ -1001,7 +1001,7 @@ def test_sample_row_keys_pooled_rotation(transport: str = "pooled_grpc_asyncio") request = {} with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._next_idx] + channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] next_channel.return_value = channel response = client.sample_row_keys(request) @@ -1012,7 +1012,7 @@ def test_sample_row_keys_pooled_rotation(transport: str = "pooled_grpc_asyncio") stub_key = (channel, "sample_row_keys") assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel - starting_idx = client.transport._next_idx + starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): response = client.sample_row_keys(request) assert next_channel.call_count == i @@ -1247,7 +1247,7 @@ def test_mutate_row_pooled_rotation(transport: str = "pooled_grpc_asyncio"): request = {} with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._next_idx] + channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] next_channel.return_value = channel response = client.mutate_row(request) @@ -1258,7 +1258,7 @@ def test_mutate_row_pooled_rotation(transport: str = "pooled_grpc_asyncio"): stub_key = (channel, "mutate_row") assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel - starting_idx = client.transport._next_idx + starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): response = client.mutate_row(request) assert next_channel.call_count == i @@ -1538,7 +1538,7 @@ def test_mutate_rows_pooled_rotation(transport: str = "pooled_grpc_asyncio"): request = {} with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._next_idx] + channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] next_channel.return_value = channel response = client.mutate_rows(request) @@ -1549,7 +1549,7 @@ def test_mutate_rows_pooled_rotation(transport: str = "pooled_grpc_asyncio"): stub_key = (channel, "mutate_rows") assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel - starting_idx = client.transport._next_idx + starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): response = client.mutate_rows(request) assert next_channel.call_count == i @@ -1799,7 +1799,7 @@ def test_check_and_mutate_row_pooled_rotation(transport: str = "pooled_grpc_asyn request = {} with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._next_idx] + channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] next_channel.return_value = channel response = client.check_and_mutate_row(request) @@ -1810,7 +1810,7 @@ def test_check_and_mutate_row_pooled_rotation(transport: str = "pooled_grpc_asyn stub_key = (channel, "check_and_mutate_row") assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel - starting_idx = client.transport._next_idx + starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): response = client.check_and_mutate_row(request) assert next_channel.call_count == i @@ -2204,7 +2204,7 @@ def test_ping_and_warm_pooled_rotation(transport: str = "pooled_grpc_asyncio"): request = {} with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._next_idx] + channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] next_channel.return_value = channel response = client.ping_and_warm(request) @@ -2215,7 +2215,7 @@ def test_ping_and_warm_pooled_rotation(transport: str = "pooled_grpc_asyncio"): stub_key = (channel, "ping_and_warm") assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel - starting_idx = client.transport._next_idx + starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): response = client.ping_and_warm(request) assert next_channel.call_count == i @@ -2452,7 +2452,7 @@ def test_read_modify_write_row_pooled_rotation(transport: str = "pooled_grpc_asy request = {} with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._next_idx] + channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] next_channel.return_value = channel response = client.read_modify_write_row(request) @@ -2463,7 +2463,7 @@ def test_read_modify_write_row_pooled_rotation(transport: str = "pooled_grpc_asy stub_key = (channel, "read_modify_write_row") assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel - starting_idx = client.transport._next_idx + starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): response = client.read_modify_write_row(request) assert next_channel.call_count == i @@ -2741,7 +2741,7 @@ def test_generate_initial_change_stream_partitions_pooled_rotation( request = {} with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._next_idx] + channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] next_channel.return_value = channel response = client.generate_initial_change_stream_partitions(request) @@ -2752,7 +2752,7 @@ def test_generate_initial_change_stream_partitions_pooled_rotation( stub_key = (channel, "generate_initial_change_stream_partitions") assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel - starting_idx = client.transport._next_idx + starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): response = client.generate_initial_change_stream_partitions(request) assert next_channel.call_count == i @@ -3018,19 +3018,18 @@ def test_read_change_stream(request_type, transport: str = "grpc"): def test_read_change_stream_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) + with mock.patch.object(transports.pooled_grpc_asyncio.PooledChannel, "next_channel") as next_channel: + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} - with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._next_idx] + channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] next_channel.return_value = channel - response = client.read_change_stream(request) # Establish that next_channel was called @@ -3039,7 +3038,7 @@ def test_read_change_stream_pooled_rotation(transport: str = "pooled_grpc_asynci stub_key = (channel, "read_change_stream") assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel - starting_idx = client.transport._next_idx + starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): response = client.read_change_stream(request) assert next_channel.call_count == i @@ -6841,20 +6840,20 @@ def test_pooled_transport_next_channel(): pool_size=num_channels, ) assert len(transport._grpc_channel._pool) == num_channels - transport._next_idx = 0 + transport._grpc_channel._next_idx = 0 # rotate through all channels multiple times num_cycles = 4 for _ in range(num_cycles): for i in range(num_channels - 1): - assert transport._next_idx == i - got_channel = transport.next_channel() + assert transport._grpc_channel._next_idx == i + got_channel = transport._grpc_channel.next_channel() assert got_channel == transport._grpc_channel._pool[i] - assert transport._next_idx == (i + 1) + assert transport._grpc_channel._next_idx == (i + 1) # test wrap around - assert transport._next_idx == num_channels - 1 - got_channel = transport.next_channel() + assert transport._grpc_channel._next_idx == num_channels - 1 + got_channel = transport._grpc_channel.next_channel() assert got_channel == transport._grpc_channel._pool[num_channels - 1] - assert transport._next_idx == 0 + assert transport._grpc_channel._next_idx == 0 def test_pooled_transport_pool_unique_channels(): @@ -6879,10 +6878,7 @@ def test_pooled_transport_pool_creation(): scopes = ["test1", "test2"] quota_project_id = "test3" host = "testhost:8080" - - with mock.patch.object( - transports.PooledBigtableGrpcAsyncIOTransport, "create_channel" - ) as create_channel: + with mock.patch("google.api_core.grpc_helpers_async.create_channel") as create_channel: transport = transports.PooledBigtableGrpcAsyncIOTransport( credentials=creds, pool_size=num_channels, @@ -6892,9 +6888,8 @@ def test_pooled_transport_pool_creation(): ) assert create_channel.call_count == num_channels for i in range(num_channels): - args = create_channel.call_args_list[i][0] - assert args[0] == host kwargs = create_channel.call_args_list[i][1] + assert kwargs["target"] == host assert kwargs["credentials"] == creds assert kwargs["scopes"] == scopes assert kwargs["quota_project_id"] == quota_project_id From 522f7fad22abe24d203cd9cfb1c0be811e3df19e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Sun, 2 Apr 2023 10:48:07 -0700 Subject: [PATCH 150/349] improved tests --- tests/unit/test_client.py | 42 ++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index a57d4cad5..de792350e 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -58,7 +58,7 @@ async def test_ctor(): ) await asyncio.sleep(0.1) assert client.project == expected_project - assert len(client.transport.channel_pool) == expected_pool_size + assert len(client.transport._grpc_channel._pool) == expected_pool_size assert client.metadata == expected_metadata assert not client._active_instances assert len(client._channel_refresh_tasks) == expected_pool_size @@ -154,22 +154,16 @@ async def test_veneer_grpc_headers(): @pytest.mark.asyncio async def test_channel_pool_creation(): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledBigtableGrpcAsyncIOTransport, - ) - pool_size = 14 - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "create_channel" - ) as create_channel: + with mock.patch("google.api_core.grpc_helpers_async.create_channel") as create_channel: create_channel.return_value = AsyncMock() client = _make_one(project="project-id", pool_size=pool_size) assert create_channel.call_count == pool_size await client.close() # channels should be unique client = _make_one(project="project-id", pool_size=pool_size) - pool_list = list(client.transport.channel_pool) - pool_set = set(client.transport.channel_pool) + pool_list = list(client.transport._grpc_channel._pool) + pool_set = set(client.transport._grpc_channel._pool) assert len(pool_list) == len(pool_set) await client.close() @@ -178,13 +172,13 @@ async def test_channel_pool_creation(): async def test_channel_pool_rotation(): pool_size = 7 client = _make_one(project="project-id", pool_size=pool_size) - assert len(client.transport.channel_pool) == pool_size + assert len(client.transport._grpc_channel._pool) == pool_size - with mock.patch.object(type(client.transport), "next_channel") as next_channel: - with mock.patch.object(type(client.transport.channel_pool[0]), "unary_unary"): + with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: + with mock.patch.object(type(client.transport._grpc_channel._pool[0]), "unary_unary"): # calling an rpc `pool_size` times should use a different channel each time for i in range(pool_size): - channel_1 = client.transport.channel_pool[client.transport._next_idx] + channel_1 = client.transport._grpc_channel._pool[client.transport._next_idx] next_channel.return_value = channel_1 client.transport.ping_and_warm() assert next_channel.call_count == i + 1 @@ -197,10 +191,10 @@ async def test_channel_pool_replace(): pool_size = 7 client = _make_one(project="project-id", pool_size=pool_size) for replace_idx in range(pool_size): - start_pool = [channel for channel in client.transport.channel_pool] + start_pool = [channel for channel in client.transport._grpc_channel._pool] grace_period = 9 with mock.patch.object( - type(client.transport.channel_pool[0]), "close" + type(client.transport._grpc_channel._pool[0]), "close" ) as close: new_channel = grpc.aio.insecure_channel("localhost:8080") await client.transport.replace_channel( @@ -208,12 +202,12 @@ async def test_channel_pool_replace(): ) close.assert_called_once_with(grace=grace_period) close.assert_awaited_once() - assert client.transport.channel_pool[replace_idx] == new_channel + assert client.transport._grpc_channel._pool[replace_idx] == new_channel for i in range(pool_size): if i != replace_idx: - assert client.transport.channel_pool[i] == start_pool[i] + assert client.transport._grpc_channel._pool[i] == start_pool[i] else: - assert client.transport.channel_pool[i] != start_pool[i] + assert client.transport._grpc_channel._pool[i] != start_pool[i] await client.close() @@ -248,7 +242,7 @@ async def test_start_background_channel_refresh(pool_size): assert isinstance(task, asyncio.Task) await asyncio.sleep(0.1) assert ping_and_warm.call_count == pool_size - for channel in client.transport.channel_pool: + for channel in client.transport._grpc_channel._pool: ping_and_warm.assert_any_call(channel) await client.close() @@ -273,7 +267,7 @@ async def test__ping_and_warm_instances(): # test with no instances with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: client = _make_one(project="project-id", pool_size=1) - channel = client.transport.channel_pool[0] + channel = client.transport._grpc_channel._pool[0] await client._ping_and_warm_instances(channel) gather.assert_called_once() gather.assert_awaited_once() @@ -342,9 +336,7 @@ async def test__manage_channel_ping_and_warm(): client = _make_one(project="project-id") new_channel = grpc.aio.insecure_channel("localhost:8080") with mock.patch.object(asyncio, "sleep"): - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "create_channel" - ) as create_channel: + with mock.patch("google.api_core.grpc_helpers_async.create_channel") as create_channel: create_channel.return_value = new_channel with mock.patch.object( PooledBigtableGrpcAsyncIOTransport, "replace_channel" @@ -356,7 +348,7 @@ async def test__manage_channel_ping_and_warm(): ) as ping_and_warm: try: channel_idx = 2 - old_channel = client.transport.channel_pool[channel_idx] + old_channel = client.transport._grpc_channel._pool[channel_idx] await client._manage_channel(channel_idx, 10) except asyncio.CancelledError: pass From 9429244515b7cb5900c9affba67730e8e2db6772 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Sun, 2 Apr 2023 10:56:05 -0700 Subject: [PATCH 151/349] renamed RowResponse and CellResponse to Row and Cell --- google/cloud/bigtable/__init__.py | 8 +-- google/cloud/bigtable/client.py | 14 ++--- google/cloud/bigtable/mutations.py | 2 +- google/cloud/bigtable/mutations_batcher.py | 2 +- .../cloud/bigtable/read_modify_write_rules.py | 2 +- .../bigtable/{row_response.py => row.py} | 53 +++++++++---------- .../{test_row_response.py => test_row.py} | 48 ++++++++--------- 7 files changed, 62 insertions(+), 67 deletions(-) rename google/cloud/bigtable/{row_response.py => row.py} (89%) rename tests/unit/{test_row_response.py => test_row.py} (94%) diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index daa562c0c..9819bc0fa 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -22,8 +22,8 @@ from google.cloud.bigtable.client import Table from google.cloud.bigtable.read_rows_query import ReadRowsQuery -from google.cloud.bigtable.row_response import RowResponse -from google.cloud.bigtable.row_response import CellResponse +from google.cloud.bigtable.row import Row +from google.cloud.bigtable.row import Cell from google.cloud.bigtable.mutations_batcher import MutationsBatcher from google.cloud.bigtable.mutations import Mutation @@ -50,6 +50,6 @@ "DeleteRangeFromColumn", "DeleteAllFromFamily", "DeleteAllFromRow", - "RowResponse", - "CellResponse", + "Row", + "Cell", ) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index df4bf308f..23f0cb6fe 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -25,7 +25,7 @@ if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry from google.cloud.bigtable.mutations_batcher import MutationsBatcher - from google.cloud.bigtable.row_response import RowResponse + from google.cloud.bigtable.row import Row from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable import RowKeySamples from google.cloud.bigtable.row_filters import RowFilter @@ -109,7 +109,7 @@ async def read_rows_stream( idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, metadata: list[tuple[str, str]] | None = None, - ) -> AsyncIterable[RowResponse]: + ) -> AsyncIterable[Row]: """ Returns a generator to asynchronously stream back row data. @@ -166,7 +166,7 @@ async def read_rows( per_row_timeout: int | float | None = 10, per_request_timeout: int | float | None = None, metadata: list[tuple[str, str]] | None = None, - ) -> list[RowResponse]: + ) -> list[Row]: """ Helper function that returns a full list instead of a generator @@ -184,7 +184,7 @@ async def read_row( operation_timeout: int | float | None = 60, per_request_timeout: int | float | None = None, metadata: list[tuple[str, str]] | None = None, - ) -> RowResponse: + ) -> Row: """ Helper function to return a single row @@ -206,7 +206,7 @@ async def read_rows_sharded( idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, metadata: list[tuple[str, str]] | None = None, - ) -> AsyncIterable[RowResponse]: + ) -> AsyncIterable[Row]: """ Runs a sharded query in parallel @@ -410,7 +410,7 @@ async def read_modify_write_row( *, operation_timeout: int | float | None = 60, metadata: list[tuple[str, str]] | None = None, - ) -> RowResponse: + ) -> Row: """ Reads and modifies a row atomically according to input ReadModifyWriteRules, and returns the contents of all modified cells @@ -429,7 +429,7 @@ async def read_modify_write_row( Failed requests will not be retried. - metadata: Strings which should be sent along with the request as metadata headers. Returns: - - RowResponse: containing cell data that was modified as part of the + - Row: containing cell data that was modified as part of the operation Raises: - GoogleAPIError exceptions from grpc call diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index ed3c2f065..4ff59bff9 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -15,7 +15,7 @@ from __future__ import annotations from dataclasses import dataclass -from google.cloud.bigtable.row_response import family_id, qualifier, row_key +from google.cloud.bigtable.row import family_id, qualifier, row_key class Mutation: diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 2e393cc7e..582786ee4 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -18,7 +18,7 @@ from typing import TYPE_CHECKING from google.cloud.bigtable.mutations import Mutation -from google.cloud.bigtable.row_response import row_key +from google.cloud.bigtable.row import row_key from google.cloud.bigtable.row_filters import RowFilter if TYPE_CHECKING: diff --git a/google/cloud/bigtable/read_modify_write_rules.py b/google/cloud/bigtable/read_modify_write_rules.py index a9b0885f2..839262ea2 100644 --- a/google/cloud/bigtable/read_modify_write_rules.py +++ b/google/cloud/bigtable/read_modify_write_rules.py @@ -16,7 +16,7 @@ from dataclasses import dataclass -from google.cloud.bigtable.row_response import family_id, qualifier +from google.cloud.bigtable.row import family_id, qualifier class ReadModifyWriteRule: diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row.py similarity index 89% rename from google/cloud/bigtable/row_response.py rename to google/cloud/bigtable/row.py index 8e60d1743..4a5bb6a73 100644 --- a/google/cloud/bigtable/row_response.py +++ b/google/cloud/bigtable/row.py @@ -25,7 +25,7 @@ row_value = bytes -class RowResponse(Sequence["CellResponse"]): +class Row(Sequence["Cell"]): """ Model class for row data returned from server @@ -40,26 +40,23 @@ class RowResponse(Sequence["CellResponse"]): def __init__( self, key: row_key, - cells: list[CellResponse] - | dict[tuple[family_id, qualifier], list[dict[str, Any]]], + cells: list[Cell] | dict[tuple[family_id, qualifier], list[dict[str, Any]]], ): """ - Initializes a RowResponse object + Initializes a Row object - RowResponse objects are not intended to be created by users. + Row objects are not intended to be created by users. They are returned by the Bigtable backend. """ self.row_key = key - self._cells_map: dict[ - family_id, dict[qualifier, list[CellResponse]] - ] = OrderedDict() - self._cells_list: list[CellResponse] = [] + self._cells_map: dict[family_id, dict[qualifier, list[Cell]]] = OrderedDict() + self._cells_list: list[Cell] = [] if isinstance(cells, dict): # handle dict input tmp_list = [] for (family, qualifier), cell_list in cells.items(): for cell_dict in cell_list: - cell_obj = CellResponse( + cell_obj = Cell( row=key, family=family, column_qualifier=qualifier, **cell_dict ) tmp_list.append(cell_obj) @@ -68,7 +65,7 @@ def __init__( for cell in sorted(cells): if cell.row_key != self.row_key: raise ValueError( - f"CellResponse row_key ({cell.row_key!r}) does not match RowResponse key ({self.row_key!r})" + f"Cell row_key ({cell.row_key!r}) does not match Row key ({self.row_key!r})" ) if cell.family not in self._cells_map: self._cells_map[cell.family] = OrderedDict() @@ -79,7 +76,7 @@ def __init__( def get_cells( self, family: str | None = None, qualifier: str | bytes | None = None - ) -> list[CellResponse]: + ) -> list[Cell]: """ Returns cells sorted in Bigtable native order: - Family lexicographically ascending @@ -113,9 +110,7 @@ def get_cells( ) return self._cells_map[family][qualifier] - def _get_all_from_family( - self, family: family_id - ) -> Generator[CellResponse, None, None]: + def _get_all_from_family(self, family: family_id) -> Generator[Cell, None, None]: """ Returns all cells in the row for the family_id """ @@ -157,7 +152,7 @@ def __repr__(self): cell_str_buffer.append(f" ('{family}', {qualifier}): {repr_list},") cell_str_buffer.append("}") cell_str = "\n".join(cell_str_buffer) - output = f"RowResponse(key={self.row_key!r}, cells={cell_str})" + output = f"Row(key={self.row_key!r}, cells={cell_str})" return output def to_dict(self) -> dict[str, Any]: @@ -195,34 +190,34 @@ def __contains__(self, item): `(family, qualifier)` pairs associated with the cells """ if isinstance(item, family_id): - # check if family key is in RowResponse + # check if family key is in Row return item in self._cells_map elif ( isinstance(item, tuple) and isinstance(item[0], family_id) and isinstance(item[1], (qualifier, str)) ): - # check if (family, qualifier) pair is in RowResponse + # check if (family, qualifier) pair is in Row qualifer = item[1] if isinstance(item[1], bytes) else item[1].encode() return item[0] in self._cells_map and qualifer in self._cells_map[item[0]] - # check if CellResponse is in RowResponse + # check if Cell is in Row return item in self._cells_list @overload def __getitem__( self, index: family_id | tuple[family_id, qualifier | str], - ) -> list[CellResponse]: + ) -> list[Cell]: # overload signature for type checking pass @overload - def __getitem__(self, index: int) -> CellResponse: + def __getitem__(self, index: int) -> Cell: # overload signature for type checking pass @overload - def __getitem__(self, index: slice) -> list[CellResponse]: + def __getitem__(self, index: slice) -> list[Cell]: # overload signature for type checking pass @@ -273,7 +268,7 @@ def __eq__(self, other): """ # for performance reasons, check row metadata # before checking individual cells - if not isinstance(other, RowResponse): + if not isinstance(other, Row): return False if self.row_key != other.row_key: return False @@ -301,7 +296,7 @@ def __ne__(self, other) -> bool: @total_ordering -class CellResponse: +class Cell: """ Model class for cell data @@ -320,9 +315,9 @@ def __init__( labels: list[str] | None = None, ): """ - CellResponse constructor + Cell constructor - CellResponse objects are not intended to be constructed by users. + Cell objects are not intended to be constructed by users. They are returned by the Bigtable backend. """ self.value = value @@ -368,7 +363,7 @@ def __repr__(self): """ Returns a string representation of the cell """ - return f"CellResponse(value={self.value!r}, row={self.row_key!r}, family='{self.family}', column_qualifier={self.column_qualifier!r}, timestamp_micros={self.timestamp_micros}, labels={self.labels})" + return f"Cell(value={self.value!r}, row={self.row_key!r}, family='{self.family}', column_qualifier={self.column_qualifier!r}, timestamp_micros={self.timestamp_micros}, labels={self.labels})" """For Bigtable native ordering""" @@ -376,7 +371,7 @@ def __lt__(self, other) -> bool: """ Implements `<` operator """ - if not isinstance(other, CellResponse): + if not isinstance(other, Cell): return NotImplemented this_ordering = ( self.family, @@ -398,7 +393,7 @@ def __eq__(self, other) -> bool: """ Implements `==` operator """ - if not isinstance(other, CellResponse): + if not isinstance(other, Cell): return NotImplemented return ( self.row_key == other.row_key diff --git a/tests/unit/test_row_response.py b/tests/unit/test_row.py similarity index 94% rename from tests/unit/test_row_response.py rename to tests/unit/test_row.py index 4eebfaa8a..0f5e67a2f 100644 --- a/tests/unit/test_row_response.py +++ b/tests/unit/test_row.py @@ -24,12 +24,12 @@ TEST_LABELS = ["label1", "label2"] -class TestRowResponse(unittest.TestCase): +class TestRow(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable.row_response import RowResponse + from google.cloud.bigtable.row_response import Row - return RowResponse + return Row def _make_one(self, *args, **kwargs): if len(args) == 0: @@ -45,9 +45,9 @@ def _make_cell( timestamp=TEST_TIMESTAMP, labels=TEST_LABELS, ): - from google.cloud.bigtable.row_response import CellResponse + from google.cloud.bigtable.row_response import Cell - return CellResponse(value, row_key, family_id, qualifier, timestamp, labels) + return Cell(value, row_key, family_id, qualifier, timestamp, labels) def test_ctor(self): cells = [self._make_cell(), self._make_cell()] @@ -123,27 +123,27 @@ def test_get_cells(self): row_response.get_cells(family="1", qualifier=b"c") def test__repr__(self): - from google.cloud.bigtable.row_response import CellResponse - from google.cloud.bigtable.row_response import RowResponse + from google.cloud.bigtable.row_response import Cell + from google.cloud.bigtable.row_response import Row cell_str = ( "{'value': b'1234', 'timestamp_micros': %d, 'labels': ['label1', 'label2']}" % (TEST_TIMESTAMP) ) - expected_prefix = "RowResponse(key=b'row', cells=" + expected_prefix = "Row(key=b'row', cells=" row = self._make_one(TEST_ROW_KEY, [self._make_cell()]) self.assertIn(expected_prefix, repr(row)) self.assertIn(cell_str, repr(row)) expected_full = ( - "RowResponse(key=b'row', cells={\n ('cf1', b'col'): [{'value': b'1234', 'timestamp_micros': %d, 'labels': ['label1', 'label2']}],\n})" + "Row(key=b'row', cells={\n ('cf1', b'col'): [{'value': b'1234', 'timestamp_micros': %d, 'labels': ['label1', 'label2']}],\n})" % (TEST_TIMESTAMP) ) self.assertEqual(expected_full, repr(row)) # should be able to construct instance from __repr__ result = eval(repr(row)) self.assertEqual(result, row) - self.assertIsInstance(result, RowResponse) - self.assertIsInstance(result[0], CellResponse) + self.assertIsInstance(result, Row) + self.assertIsInstance(result[0], Cell) # try with multiple cells row = self._make_one(TEST_ROW_KEY, [self._make_cell(), self._make_cell()]) self.assertIn(expected_prefix, repr(row)) @@ -151,10 +151,10 @@ def test__repr__(self): # should be able to construct instance from __repr__ result = eval(repr(row)) self.assertEqual(result, row) - self.assertIsInstance(result, RowResponse) + self.assertIsInstance(result, Row) self.assertEqual(len(result), 2) - self.assertIsInstance(result[0], CellResponse) - self.assertIsInstance(result[1], CellResponse) + self.assertIsInstance(result[0], Cell) + self.assertIsInstance(result[1], Cell) def test___str__(self): cells = { @@ -230,9 +230,9 @@ def test_to_dict(self): def test_iteration(self): from types import GeneratorType - from google.cloud.bigtable.row_response import CellResponse + from google.cloud.bigtable.row_response import Cell - # should be able to iterate over the RowResponse as a list + # should be able to iterate over the Row as a list cell3 = self._make_cell(value=b"3") cell1 = self._make_cell(value=b"1") cell2 = self._make_cell(value=b"2") @@ -245,7 +245,7 @@ def test_iteration(self): # should be able to iterate over all cells idx = 0 for cell in row_response: - self.assertIsInstance(cell, CellResponse) + self.assertIsInstance(cell, Cell) self.assertEqual(cell.value, result_list[idx].value) self.assertEqual(cell.value, str(idx + 1).encode()) idx += 1 @@ -505,12 +505,12 @@ def test_index_of(self): row_response.index(None) -class TestCellResponse(unittest.TestCase): +class TestCell(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable.row_response import CellResponse + from google.cloud.bigtable.row_response import Cell - return CellResponse + return Cell def _make_one(self, *args, **kwargs): if len(args) == 0: @@ -632,11 +632,11 @@ def test___str__(self): self.assertEqual(str(cell), str(test_value)) def test___repr__(self): - from google.cloud.bigtable.row_response import CellResponse # type: ignore # noqa: F401 + from google.cloud.bigtable.row_response import Cell # type: ignore # noqa: F401 cell = self._make_one() expected = ( - "CellResponse(value=b'1234', row=b'row', " + "Cell(value=b'1234', row=b'row', " + "family='cf1', column_qualifier=b'col', " + f"timestamp_micros={TEST_TIMESTAMP}, labels=['label1', 'label2'])" ) @@ -646,7 +646,7 @@ def test___repr__(self): self.assertEqual(result, cell) def test___repr___no_labels(self): - from google.cloud.bigtable.row_response import CellResponse # type: ignore # noqa: F401 + from google.cloud.bigtable.row_response import Cell # type: ignore # noqa: F401 cell_no_labels = self._make_one( TEST_VALUE, @@ -657,7 +657,7 @@ def test___repr___no_labels(self): None, ) expected = ( - "CellResponse(value=b'1234', row=b'row', " + "Cell(value=b'1234', row=b'row', " + "family='cf1', column_qualifier=b'col', " + f"timestamp_micros={TEST_TIMESTAMP}, labels=[])" ) From 1aa74246bd249e7c94c7c5fed4aaa74f1e4fa067 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Sun, 2 Apr 2023 11:53:48 -0700 Subject: [PATCH 152/349] fixed tests --- tests/unit/test_row.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/unit/test_row.py b/tests/unit/test_row.py index 0f5e67a2f..ddcaa62cc 100644 --- a/tests/unit/test_row.py +++ b/tests/unit/test_row.py @@ -27,7 +27,7 @@ class TestRow(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable.row_response import Row + from google.cloud.bigtable.row import Row return Row @@ -45,7 +45,7 @@ def _make_cell( timestamp=TEST_TIMESTAMP, labels=TEST_LABELS, ): - from google.cloud.bigtable.row_response import Cell + from google.cloud.bigtable.row import Cell return Cell(value, row_key, family_id, qualifier, timestamp, labels) @@ -123,8 +123,8 @@ def test_get_cells(self): row_response.get_cells(family="1", qualifier=b"c") def test__repr__(self): - from google.cloud.bigtable.row_response import Cell - from google.cloud.bigtable.row_response import Row + from google.cloud.bigtable.row import Cell + from google.cloud.bigtable.row import Row cell_str = ( "{'value': b'1234', 'timestamp_micros': %d, 'labels': ['label1', 'label2']}" @@ -230,7 +230,7 @@ def test_to_dict(self): def test_iteration(self): from types import GeneratorType - from google.cloud.bigtable.row_response import Cell + from google.cloud.bigtable.row import Cell # should be able to iterate over the Row as a list cell3 = self._make_cell(value=b"3") @@ -508,7 +508,7 @@ def test_index_of(self): class TestCell(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable.row_response import Cell + from google.cloud.bigtable.row import Cell return Cell @@ -632,7 +632,7 @@ def test___str__(self): self.assertEqual(str(cell), str(test_value)) def test___repr__(self): - from google.cloud.bigtable.row_response import Cell # type: ignore # noqa: F401 + from google.cloud.bigtable.row import Cell # type: ignore # noqa: F401 cell = self._make_one() expected = ( @@ -646,7 +646,7 @@ def test___repr__(self): self.assertEqual(result, cell) def test___repr___no_labels(self): - from google.cloud.bigtable.row_response import Cell # type: ignore # noqa: F401 + from google.cloud.bigtable.row import Cell # type: ignore # noqa: F401 cell_no_labels = self._make_one( TEST_VALUE, From a6036495c1bf014c56d188abe740a6194c39da46 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Sun, 2 Apr 2023 12:07:56 -0700 Subject: [PATCH 153/349] simplified row construction --- google/cloud/bigtable/row.py | 18 ++-------- tests/unit/test_row.py | 67 ++++++------------------------------ 2 files changed, 12 insertions(+), 73 deletions(-) diff --git a/google/cloud/bigtable/row.py b/google/cloud/bigtable/row.py index 4a5bb6a73..8231e324e 100644 --- a/google/cloud/bigtable/row.py +++ b/google/cloud/bigtable/row.py @@ -40,7 +40,7 @@ class Row(Sequence["Cell"]): def __init__( self, key: row_key, - cells: list[Cell] | dict[tuple[family_id, qualifier], list[dict[str, Any]]], + cells: list[Cell], ): """ Initializes a Row object @@ -51,22 +51,8 @@ def __init__( self.row_key = key self._cells_map: dict[family_id, dict[qualifier, list[Cell]]] = OrderedDict() self._cells_list: list[Cell] = [] - if isinstance(cells, dict): - # handle dict input - tmp_list = [] - for (family, qualifier), cell_list in cells.items(): - for cell_dict in cell_list: - cell_obj = Cell( - row=key, family=family, column_qualifier=qualifier, **cell_dict - ) - tmp_list.append(cell_obj) - cells = tmp_list # add cells to internal stores using Bigtable native ordering - for cell in sorted(cells): - if cell.row_key != self.row_key: - raise ValueError( - f"Cell row_key ({cell.row_key!r}) does not match Row key ({self.row_key!r})" - ) + for cell in cells: if cell.family not in self._cells_map: self._cells_map[cell.family] = OrderedDict() if cell.column_qualifier not in self._cells_map[cell.family]: diff --git a/tests/unit/test_row.py b/tests/unit/test_row.py index ddcaa62cc..92f58eb36 100644 --- a/tests/unit/test_row.py +++ b/tests/unit/test_row.py @@ -55,39 +55,6 @@ def test_ctor(self): self.assertEqual(list(row_response), cells) self.assertEqual(row_response.row_key, TEST_ROW_KEY) - def test_ctor_dict(self): - cells = { - (TEST_FAMILY_ID, TEST_QUALIFIER): [ - self._make_cell().to_dict(), - self._make_cell().to_dict(), - ] - } - row_response = self._make_one(TEST_ROW_KEY, cells) - self.assertEqual(row_response.row_key, TEST_ROW_KEY) - self.assertEqual(len(row_response), 2) - for i in range(2): - self.assertEqual(row_response[i].value, TEST_VALUE) - self.assertEqual(row_response[i].row_key, TEST_ROW_KEY) - self.assertEqual(row_response[i].family, TEST_FAMILY_ID) - self.assertEqual(row_response[i].column_qualifier, TEST_QUALIFIER) - self.assertEqual(row_response[i].labels, TEST_LABELS) - self.assertEqual(row_response[0].timestamp_micros, TEST_TIMESTAMP) - self.assertEqual(row_response[1].timestamp_micros, TEST_TIMESTAMP) - - def test_ctor_bad_cell(self): - cells = [self._make_cell(), self._make_cell()] - cells[1].row_key = b"other" - with self.assertRaises(ValueError): - self._make_one(TEST_ROW_KEY, cells) - - def test_cell_order(self): - # cells should be ordered on init - cell1 = self._make_cell(value=b"1") - cell2 = self._make_cell(value=b"2") - resp = self._make_one(TEST_ROW_KEY, [cell2, cell1]) - output = list(resp) - self.assertEqual(output, [cell1, cell2]) - def test_get_cells(self): cell_list = [] for family_id in ["1", "2"]: @@ -122,7 +89,7 @@ def test_get_cells(self): with self.assertRaises(ValueError): row_response.get_cells(family="1", qualifier=b"c") - def test__repr__(self): + def test___repr__(self): from google.cloud.bigtable.row import Cell from google.cloud.bigtable.row import Row @@ -139,38 +106,24 @@ def test__repr__(self): % (TEST_TIMESTAMP) ) self.assertEqual(expected_full, repr(row)) - # should be able to construct instance from __repr__ - result = eval(repr(row)) - self.assertEqual(result, row) - self.assertIsInstance(result, Row) - self.assertIsInstance(result[0], Cell) # try with multiple cells row = self._make_one(TEST_ROW_KEY, [self._make_cell(), self._make_cell()]) self.assertIn(expected_prefix, repr(row)) self.assertIn(cell_str, repr(row)) - # should be able to construct instance from __repr__ - result = eval(repr(row)) - self.assertEqual(result, row) - self.assertIsInstance(result, Row) - self.assertEqual(len(result), 2) - self.assertIsInstance(result[0], Cell) - self.assertIsInstance(result[1], Cell) def test___str__(self): - cells = { - ("3", TEST_QUALIFIER): [ - self._make_cell().to_dict(), - self._make_cell().to_dict(), - self._make_cell().to_dict(), - ] - } - cells[("1", TEST_QUALIFIER)] = [self._make_cell().to_dict()] + cells = [ + self._make_cell(value=b"1234", family_id="1", qualifier=b"col"), + self._make_cell(value=b"5678", family_id="3", qualifier=b"col"), + self._make_cell(value=b"1", family_id="3", qualifier=b"col"), + self._make_cell(value=b"2", family_id="3", qualifier=b"col"), + ] row_response = self._make_one(TEST_ROW_KEY, cells) expected = ( "{\n" + " (family='1', qualifier=b'col'): [b'1234'],\n" - + " (family='3', qualifier=b'col'): [b'1234', (+2 more)],\n" + + " (family='3', qualifier=b'col'): [b'5678', (+2 more)],\n" + "}" ) self.assertEqual(expected, str(row_response)) @@ -233,10 +186,10 @@ def test_iteration(self): from google.cloud.bigtable.row import Cell # should be able to iterate over the Row as a list - cell3 = self._make_cell(value=b"3") cell1 = self._make_cell(value=b"1") cell2 = self._make_cell(value=b"2") - row_response = self._make_one(TEST_ROW_KEY, [cell3, cell1, cell2]) + cell3 = self._make_cell(value=b"3") + row_response = self._make_one(TEST_ROW_KEY, [cell1, cell2, cell3]) self.assertEqual(len(row_response), 3) # should create generator object self.assertIsInstance(iter(row_response), GeneratorType) From 68a5a0f9167ea47e6f9957cb8951d0035673c963 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 08:58:42 -0700 Subject: [PATCH 154/349] added RowRange object --- google/cloud/bigtable/read_rows_query.py | 75 +++++++++++++++--------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 24f85f622..0cec29718 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -28,6 +28,46 @@ class _RangePoint: key: row_key is_inclusive: bool +@dataclass +class RowRange + start: _RangePoint | None + end: _RangePoint | None + + def __init__(self, + start_key: str | bytes | None = None, + end_key: str | bytes | None = None, + start_is_inclusive: bool | None = None, + end_is_inclusive: bool | None = None, + ): + # check for invalid combinations of arguments + if start_is_inclusive is None: + start_is_inclusive = True + elif start_key is None: + raise ValueError("start_is_inclusive must be set with start_key") + if end_is_inclusive is None: + end_is_inclusive = False + elif end_key is None: + raise ValueError("end_is_inclusive must be set with end_key") + # ensure that start_key and end_key are bytes + if isinstance(start_key, str): + start_key = start_key.encode() + elif start_key is not None and not isinstance(start_key, bytes): + raise ValueError("start_key must be a string or bytes") + if isinstance(end_key, str): + end_key = end_key.encode() + elif end_key is not None and not isinstance(end_key, bytes): + raise ValueError("end_key must be a string or bytes") + + self.start = ( + _RangePoint(start_key, start_is_inclusive) + if start_key is not None + else None + ) + self.end = ( + _RangePoint(end_key, end_is_inclusive) + if end_key is not None + else None + ) class ReadRowsQuery: """ @@ -37,6 +77,7 @@ class ReadRowsQuery: def __init__( self, row_keys: list[str | bytes] | str | bytes | None = None, + row_ranges: list[RowRange] | RowRange | None = None, limit: int | None = None, row_filter: RowFilter | dict[str, Any] | None = None, ): @@ -50,7 +91,9 @@ def __init__( - row_filter: a RowFilter to apply to the query """ self.row_keys: set[bytes] = set() - self.row_ranges: list[tuple[_RangePoint | None, _RangePoint | None]] = [] + self.row_ranges: list[RowRange] = [] + for range in row_ranges: + self.row_ranges.append(range) if row_keys: self.add_rows(row_keys) self.limit: int | None = limit @@ -138,32 +181,10 @@ def add_range( - end_is_inclusive: if True, the end key is included in the range defaults to False if None. Must not be included if end_key is None """ - # check for invalid combinations of arguments - if start_is_inclusive is None: - start_is_inclusive = True - elif start_key is None: - raise ValueError("start_is_inclusive must be set with start_key") - if end_is_inclusive is None: - end_is_inclusive = False - elif end_key is None: - raise ValueError("end_is_inclusive must be set with end_key") - # ensure that start_key and end_key are bytes - if isinstance(start_key, str): - start_key = start_key.encode() - elif start_key is not None and not isinstance(start_key, bytes): - raise ValueError("start_key must be a string or bytes") - if isinstance(end_key, str): - end_key = end_key.encode() - elif end_key is not None and not isinstance(end_key, bytes): - raise ValueError("end_key must be a string or bytes") - - start_pt = ( - _RangePoint(start_key, start_is_inclusive) - if start_key is not None - else None + new_range = RowRange( + start_key, end_key, start_is_inclusive, end_is_inclusive ) - end_pt = _RangePoint(end_key, end_is_inclusive) if end_key is not None else None - self.row_ranges.append((start_pt, end_pt)) + self.row_ranges.append(new_range) return self def shard(self, shard_keys: "RowKeySamples" | None = None) -> list[ReadRowsQuery]: @@ -226,7 +247,7 @@ def limit(self, new_limit: int | None): @property def filter(self): """ - Getter implemntation for filter property + Getter implementation for filter property """ return self._filter From cc2e7c8522629db8992a53f96668ad4267839746 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 09:10:24 -0700 Subject: [PATCH 155/349] added comments --- google/cloud/bigtable/row_merger.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index b8c01cb81..36519f9a0 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -54,6 +54,14 @@ async def merge_row_stream( ) -> AsyncGenerator[RowResponse, None]: """ Consume chunks from a ReadRowsResponse stream into a set of Rows + + Args: + - request_generator: AsyncIterable of ReadRowsResponse objects. Typically + this is a stream of chunks from the Bigtable API + Returns: + - AsyncGenerator of Rows + Raises: + - InvalidChunk: if the chunk stream is invalid """ async for row_response in request_generator: # unwrap protoplus object for increased performance @@ -74,6 +82,9 @@ async def merge_row_stream( async def _generator_to_cache( self, cache: asyncio.Queue[Any], input_generator: AsyncIterable[Any] ) -> None: + """ + Helper function to push items from an async generator into a cache + """ async for item in input_generator: await cache.put(item) @@ -82,6 +93,20 @@ async def merge_row_stream_with_cache( request_generator: AsyncIterable[ReadRowsResponse], max_cache_size: int | None = None, ) -> AsyncGenerator[RowResponse, None]: + """ + Consume chunks from a ReadRowsResponse stream into a set of Rows, + with a local cache to decouple the producer from the consumer + + Args: + - request_generator: AsyncIterable of ReadRowsResponse objects. Typically + this is a stream of chunks from the Bigtable API + - max_cache_size: maximum number of items to cache. If None, cache size + is unbounded + Returns: + - AsyncGenerator of Rows + Raises: + - InvalidChunk: if the chunk stream is invalid + """ if max_cache_size is None: max_cache_size = -1 cache: asyncio.Queue[RowResponse] = asyncio.Queue(max_cache_size) From ba629c84b8e8fbd39bbd4f70c883f3ba108ea2db Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 09:20:19 -0700 Subject: [PATCH 156/349] added api-core submodule --- .gitmodules | 3 +++ python-api-core | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 python-api-core diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..3cfed03b8 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "python-api-core"] + path = python-api-core + url = git@github.com:googleapis/python-api-core.git diff --git a/python-api-core b/python-api-core new file mode 160000 index 000000000..985b13a5e --- /dev/null +++ b/python-api-core @@ -0,0 +1 @@ +Subproject commit 985b13a5e633958204d4fa60b0c0d840fc0351f8 From 75d2c10f86ab01d42a5b076979ed5b24d2b581b1 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 09:44:03 -0700 Subject: [PATCH 157/349] copied in rough retryable logic --- google/cloud/bigtable/client.py | 64 ++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index d96ba6f7d..395e30ee0 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -22,6 +22,7 @@ from google.cloud.bigtable.row_merger import RowMerger import google.auth.credentials +from google.api_core import retry_async as retries if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry @@ -157,6 +158,40 @@ async def read_rows_stream( """ request = query.to_dict() if isinstance(query, ReadRowsQuery) else query request["table_name"] = self._gapic_client.table_name(self.table_id) + + def on_error(exc): + print(f"RETRYING: {exc}") + return exc + retry = retries.AsyncRetry( + predicate=retries.if_exception_type( + RuntimeError, + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable + ), + timeout=timeout, + on_error=on_error, + initial=0.1, + multiplier=2, + maximum=1, + is_generator=True + ) + retryable_fn = retry(self._read_rows_retryable) + emitted_rows:Set[bytes] = set({}) + async for result in retryable_fn(requestm emmited_rows, operation_timeout): + if isinstance(result, Row): + yield result + elif isinstance(result, Exception): + print(f"Exception: {result}") + + + async def _read_rows_retryable( + self, request:dict[str, Any], emitted_rows: set[bytes], operation_timeout=60.0, revise_on_retry=True + ) -> AsyncGenerator[Row, None]: + if revise_request_on_retry and len(emitted_rows) > 0: + # if this is a retry, try to trim down the request to avoid ones we've already processed + request["rows"] = self._revise_rowset( + request.get("rows", None), emitted_rows + ) gapic_stream_handler = await self._gapic_client.read_rows( request=request, app_profile_id=self.app_profile_id, @@ -164,7 +199,34 @@ async def read_rows_stream( ) merger = RowMerger() async for row in merger.merge_row_stream(gapic_stream_handler): - yield row + if row.row_key not in emitted_rows: + yield row + emitted_rows.add(row.row_key) + + def _revise_rowset( + self, row_set: dict[str, Any]|None, emitted_rows: set[bytes] + ) -> dict[str, Any]: + # if user is doing a whole table scan, start a new one with the last seen key + if row_set is None: + last_seen = max(emitted_rows) + return { + "row_keys": [], + "row_ranges": [{"start_key_open": last_seen}], + } + else: + # remove seen keys from user-specific key list + row_keys: List[bytes] = row_set.get("row_keys", []) + adjusted_keys = [] + for key in row_keys: + if key not in emitted_rows: + adjusted_keys.append(key) + # if user specified only a single range, set start to the last seen key + row_ranges: list[dict[str, Any]] = row_set.get("row_ranges", []) + if len(row_keys) == 0 and len(row_ranges) == 1: + row_ranges[0]["start_key_open"] = max(emitted_rows) + if "start_key_closed" in row_ranges[0]: + row_ranges[0].pop("start_key_closed") + return {"row_keys": adjusted_keys, "row_ranges": row_ranges} async def read_rows( self, From 2a267971f43a98f064aa418e255eca1fd84f4932 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 09:50:28 -0700 Subject: [PATCH 158/349] updated Row and Cell class names --- google/cloud/bigtable/row_merger.py | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 36519f9a0..038c18d1c 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -15,7 +15,7 @@ from __future__ import annotations from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse -from google.cloud.bigtable.row_response import RowResponse, CellResponse +from google.cloud.bigtable.row import Row, Cell import asyncio from abc import ABC, abstractmethod @@ -37,7 +37,7 @@ class InvalidChunk(RuntimeError): class RowMerger: """ RowMerger takes in a stream of ReadRows chunks - and processes them into a stream of RowResponses. + and processes them into a stream of Rows. RowMerger can wrap the stream directly, or use a cache to decouple the producer from the consumer @@ -51,7 +51,7 @@ def __init__(self): async def merge_row_stream( self, request_generator: AsyncIterable[ReadRowsResponse] - ) -> AsyncGenerator[RowResponse, None]: + ) -> AsyncGenerator[Row, None]: """ Consume chunks from a ReadRowsResponse stream into a set of Rows @@ -92,7 +92,7 @@ async def merge_row_stream_with_cache( self, request_generator: AsyncIterable[ReadRowsResponse], max_cache_size: int | None = None, - ) -> AsyncGenerator[RowResponse, None]: + ) -> AsyncGenerator[Row, None]: """ Consume chunks from a ReadRowsResponse stream into a set of Rows, with a local cache to decouple the producer from the consumer @@ -109,7 +109,7 @@ async def merge_row_stream_with_cache( """ if max_cache_size is None: max_cache_size = -1 - cache: asyncio.Queue[RowResponse] = asyncio.Queue(max_cache_size) + cache: asyncio.Queue[Row] = asyncio.Queue(max_cache_size) stream_task = asyncio.create_task( self._generator_to_cache(cache, self.merge_row_stream(request_generator)) @@ -133,7 +133,7 @@ async def merge_row_stream_with_cache( class StateMachine: """ - State Machine converts chunks into RowResponses + State Machine converts chunks into Rows Chunks are added to the state machine via handle_chunk, which transitions the state machine through the various states. @@ -174,7 +174,7 @@ def is_terminal_state(self) -> bool: """ return isinstance(self.current_state, AWAITING_NEW_ROW) - def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> RowResponse: + def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> Row: """ Called by RowMerger to notify the state machine of a scan heartbeat @@ -184,15 +184,15 @@ def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> RowResponse: raise InvalidChunk("Last scanned row key is out of order") if not isinstance(self.current_state, AWAITING_NEW_ROW): raise InvalidChunk("Last scanned row key received in invalid state") - scan_marker = RowResponse(last_scanned_row_key, []) + scan_marker = Row(last_scanned_row_key, []) self._handle_complete_row(scan_marker) return scan_marker - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> RowResponse | None: + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> Row | None: """ Called by RowMerger to process a new chunk - Returns a RowResponse if the chunk completes a row, otherwise returns None + Returns a Row if the chunk completes a row, otherwise returns None """ if chunk.row_key in self.completed_row_keys: raise InvalidChunk(f"duplicate row key: {chunk.row_key.decode()}") @@ -219,7 +219,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> RowResponse | None: # row is not complete, return None return None - def _handle_complete_row(self, complete_row: RowResponse) -> None: + def _handle_complete_row(self, complete_row: Row) -> None: """ Complete row, update seen keys, and move back to AWAITING_NEW_ROW @@ -383,9 +383,9 @@ def __init__(self): def reset(self) -> None: """called when the current in progress row should be dropped""" self.current_key: bytes | None = None - self.working_cell: CellResponse | None = None + self.working_cell: Cell | None = None self.working_value: bytearray | None = None - self.completed_cells: List[CellResponse] = [] + self.completed_cells: List[Cell] = [] def start_row(self, key: bytes) -> None: """Called to start a new row. This will be called once per row""" @@ -413,7 +413,7 @@ def start_cell( if self.current_key is None: raise InvalidChunk("start_cell called without a row") self.working_value = bytearray() - self.working_cell = CellResponse( + self.working_cell = Cell( b"", self.current_key, family, qualifier, timestamp_micros, labels ) @@ -432,10 +432,10 @@ def finish_cell(self) -> None: self.working_cell = None self.working_value = None - def finish_row(self) -> RowResponse: + def finish_row(self) -> Row: """called once per row to signal that all cells have been processed (unless reset)""" if self.current_key is None: raise InvalidChunk("No row in progress") - new_row = RowResponse(self.current_key, self.completed_cells) + new_row = Row(self.current_key, self.completed_cells) self.reset() return new_row From bcd394f1541f3144de3f18ea75b79a75f4eb1537 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 10:01:52 -0700 Subject: [PATCH 159/349] fixed tests --- tests/unit/test_read_rows_acceptance.py | 4 ++-- tests/unit/test_row_merger.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index 25a288150..15068d13c 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -6,7 +6,7 @@ from google.cloud.bigtable_v2 import ReadRowsResponse from google.cloud.bigtable.row_merger import RowMerger, InvalidChunk -from google.cloud.bigtable.row_response import RowResponse +from google.cloud.bigtable.row import Row from .v2_client.test_row_merger import ReadRowsTest, TestFile @@ -20,7 +20,7 @@ def parse_readrows_acceptance_tests(): return test_json.read_rows_tests -def extract_results_from_row(row: RowResponse): +def extract_results_from_row(row: Row): results = [] for family, col, cells in row.items(): for cell in cells: diff --git a/tests/unit/test_row_merger.py b/tests/unit/test_row_merger.py index bd6e5d3d0..45dee8758 100644 --- a/tests/unit/test_row_merger.py +++ b/tests/unit/test_row_merger.py @@ -56,13 +56,14 @@ def test_start_row(self): def test_start_cell(self): # test with no family - with self.assertRaises(InvalidChunk): - self._make_one().start_cell('', + with self.assertRaises(InvalidChunk) as e: + self._make_one().start_cell('', TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + self.assertEqual(str(e.exception), 'Missing family for a new cell') # test with no row with self.assertRaises(InvalidChunk) as e: row_builder = self._make_one() row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) - self.assertEqual(e.exception.message, 'start_cell called without a row') + self.assertEqual(str(e.exception), 'start_cell called without a row') def test_cell_value_no_cell(self): pass From 037af0db861b38669977523e2ee519cff5bcd3b1 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 10:02:40 -0700 Subject: [PATCH 160/349] added last scanned row class --- google/cloud/bigtable/row.py | 15 +++++++++++++++ google/cloud/bigtable/row_merger.py | 4 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/row.py b/google/cloud/bigtable/row.py index 8231e324e..791899c2b 100644 --- a/google/cloud/bigtable/row.py +++ b/google/cloud/bigtable/row.py @@ -281,6 +281,21 @@ def __ne__(self, other) -> bool: return not self == other +class _LastScannedRow(Row): + """A value used to indicate a scanned row that is not returned as part of + a query. + + This is used internally to indicate progress in a scan, and improve retry + performance. It is not intended to be used directly by users. + """ + + def __init__(self, row_key): + super().__init__(row_key, []) + + def __eq__(self, other): + return isinstance(other, _LastScannedRow) + + @total_ordering class Cell: """ diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 038c18d1c..a711a5cf2 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -15,7 +15,7 @@ from __future__ import annotations from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse -from google.cloud.bigtable.row import Row, Cell +from google.cloud.bigtable.row import Row, Cell, _LastScannedRow import asyncio from abc import ABC, abstractmethod @@ -184,7 +184,7 @@ def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> Row: raise InvalidChunk("Last scanned row key is out of order") if not isinstance(self.current_state, AWAITING_NEW_ROW): raise InvalidChunk("Last scanned row key received in invalid state") - scan_marker = Row(last_scanned_row_key, []) + scan_marker = _LastScannedRow(last_scanned_row_key) self._handle_complete_row(scan_marker) return scan_marker From e17d9bc075dbdbcb4f6bf3faba68e1662a2a2e37 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 10:02:56 -0700 Subject: [PATCH 161/349] ran blacken --- tests/unit/test_row_merger.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/tests/unit/test_row_merger.py b/tests/unit/test_row_merger.py index 45dee8758..66ed852ac 100644 --- a/tests/unit/test_row_merger.py +++ b/tests/unit/test_row_merger.py @@ -3,25 +3,28 @@ from google.cloud.bigtable.row_merger import InvalidChunk -TEST_FAMILY = 'family_name' -TEST_QUALIFIER = b'column_qualifier' +TEST_FAMILY = "family_name" +TEST_QUALIFIER = b"column_qualifier" TEST_TIMESTAMP = 123456789 -TEST_LABELS = ['label1', 'label2'] +TEST_LABELS = ["label1", "label2"] + class TestRowMerger(unittest.IsolatedAsyncioTestCase): @staticmethod def _get_target_class(): from google.cloud.bigtable.row_merger import RowMerger + return RowMerger def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) -class TestStateMachine(unittest.TestCase): +class TestStateMachine(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigtable.row_merger import StateMachine + return StateMachine def _make_one(self, *args, **kwargs): @@ -31,18 +34,19 @@ def _make_one(self, *args, **kwargs): class TestState(unittest.TestCase): pass -class TestRowBuilder(unittest.TestCase): +class TestRowBuilder(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigtable.row_merger import RowBuilder + return RowBuilder def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) def test_ctor(self): - with mock.patch('google.cloud.bigtable.row_merger.RowBuilder.reset') as reset: + with mock.patch("google.cloud.bigtable.row_merger.RowBuilder.reset") as reset: self._make_one() reset.assert_called_once() row_builder = self._make_one() @@ -57,13 +61,15 @@ def test_start_row(self): def test_start_cell(self): # test with no family with self.assertRaises(InvalidChunk) as e: - self._make_one().start_cell('', TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) - self.assertEqual(str(e.exception), 'Missing family for a new cell') + self._make_one().start_cell("", TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + self.assertEqual(str(e.exception), "Missing family for a new cell") # test with no row with self.assertRaises(InvalidChunk) as e: row_builder = self._make_one() - row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) - self.assertEqual(str(e.exception), 'start_cell called without a row') + row_builder.start_cell( + TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS + ) + self.assertEqual(str(e.exception), "start_cell called without a row") def test_cell_value_no_cell(self): pass @@ -85,4 +91,3 @@ def finish_row_no_row(self): def test_reset(self): pass - From b3d977d8648968b7cb7a9ff56570f078ca445659 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 10:08:00 -0700 Subject: [PATCH 162/349] handle last scanned rows --- google/cloud/bigtable/client.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 87e676a62..719c9890f 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -28,6 +28,7 @@ from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry from google.cloud.bigtable.mutations_batcher import MutationsBatcher from google.cloud.bigtable.row import Row + from google.cloud.bigtable.row import _LastScannedRow from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable import RowKeySamples from google.cloud.bigtable.row_filters import RowFilter @@ -200,7 +201,9 @@ async def _read_rows_retryable( merger = RowMerger() async for row in merger.merge_row_stream(gapic_stream_handler): if row.row_key not in emitted_rows: - yield row + if not isinstance(row, _LastScannedRow): + # last scanned rows are not emitted + yield row emitted_rows.add(row.row_key) def _revise_rowset( From 1fba6eaa7b3d121f93b5a3aadc9631e09844b19e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 11:32:44 -0700 Subject: [PATCH 163/349] updated add_keys --- google/cloud/bigtable/read_rows_query.py | 32 ++++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 0cec29718..2d8e5d895 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -76,7 +76,7 @@ class ReadRowsQuery: def __init__( self, - row_keys: list[str | bytes] | str | bytes | None = None, + row_key: list[str | bytes] | str | bytes | None = None, row_ranges: list[RowRange] | RowRange | None = None, limit: int | None = None, row_filter: RowFilter | dict[str, Any] | None = None, @@ -85,7 +85,9 @@ def __init__( Create a new ReadRowsQuery Args: - - row_keys: a list of row keys to include in the query + - row_keys: row keys to include in the query + a query can contain multiple keys, but ranges should be preferred + - row_ranges: ranges of rows to include in the query - limit: the maximum number of rows to return. None or 0 means no limit default: None (no limit) - row_filter: a RowFilter to apply to the query @@ -95,7 +97,8 @@ def __init__( for range in row_ranges: self.row_ranges.append(range) if row_keys: - self.add_rows(row_keys) + for k in row_keys: + self.add_key(k) self.limit: int | None = limit self.filter: RowFilter | dict[str, Any] = row_filter @@ -138,27 +141,24 @@ def set_filter( self._filter = row_filter return self - def add_rows(self, row_keys: list[str | bytes] | str | bytes) -> ReadRowsQuery: + def add_key(self, row_key: str | bytes) -> ReadRowsQuery: """ - Add a list of row keys to this query + Add a row key to this query + + A query can contain multiple keys, but ranges should be preferred Args: - - row_keys: a list of row keys to add to this query + - row_key: a key to add to this query Returns: - a reference to this query for chaining Raises: - ValueError if an input is not a string or bytes """ - if not isinstance(row_keys, list): - row_keys = [row_keys] - update_set = set() - for k in row_keys: - if isinstance(k, str): - k = k.encode() - elif not isinstance(k, bytes): - raise ValueError("row_keys must be strings or bytes") - update_set.add(k) - self.row_keys.update(update_set) + if isinstance(row_key, str): + row_key = row_key.encode() + elif not isinstance(row_key, bytes): + raise ValueError("row_key must be string or bytes") + self.row_keys.add(row_key) return self def add_range( From c4f82b049ff05dc226193ca83bbec2e4cb3534fc Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 11:35:29 -0700 Subject: [PATCH 164/349] removed chaining --- google/cloud/bigtable/read_rows_query.py | 53 +++++------------------- 1 file changed, 11 insertions(+), 42 deletions(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 2d8e5d895..1df66658b 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -29,12 +29,13 @@ class _RangePoint: is_inclusive: bool @dataclass -class RowRange +class RowRange: start: _RangePoint | None end: _RangePoint | None - def __init__(self, - start_key: str | bytes | None = None, + def __init__( + self, + start_key: str | bytes | None = None, end_key: str | bytes | None = None, start_is_inclusive: bool | None = None, end_is_inclusive: bool | None = None, @@ -102,7 +103,8 @@ def __init__( self.limit: int | None = limit self.filter: RowFilter | dict[str, Any] = row_filter - def set_limit(self, new_limit: int | None): + @property + def limit(self, new_limit: int | None): """ Set the maximum number of rows to return by this query. @@ -118,11 +120,11 @@ def set_limit(self, new_limit: int | None): if new_limit is not None and new_limit < 0: raise ValueError("limit must be >= 0") self._limit = new_limit - return self - def set_filter( + @property + def filter( self, row_filter: RowFilter | dict[str, Any] | None - ) -> ReadRowsQuery: + ): """ Set a RowFilter to apply to this query @@ -139,9 +141,8 @@ def set_filter( ): raise ValueError("row_filter must be a RowFilter or dict") self._filter = row_filter - return self - def add_key(self, row_key: str | bytes) -> ReadRowsQuery: + def add_key(self, row_key: str | bytes): """ Add a row key to this query @@ -159,7 +160,6 @@ def add_key(self, row_key: str | bytes) -> ReadRowsQuery: elif not isinstance(row_key, bytes): raise ValueError("row_key must be string or bytes") self.row_keys.add(row_key) - return self def add_range( self, @@ -167,7 +167,7 @@ def add_range( end_key: str | bytes | None = None, start_is_inclusive: bool | None = None, end_is_inclusive: bool | None = None, - ) -> ReadRowsQuery: + ): """ Add a range of row keys to this query. @@ -185,7 +185,6 @@ def add_range( start_key, end_key, start_is_inclusive, end_is_inclusive ) self.row_ranges.append(new_range) - return self def shard(self, shard_keys: "RowKeySamples" | None = None) -> list[ReadRowsQuery]: """ @@ -227,33 +226,3 @@ def to_dict(self) -> dict[str, Any]: if self.limit is not None: final_dict["rows_limit"] = self.limit return final_dict - - # Support limit and filter as properties - - @property - def limit(self) -> int | None: - """ - Getter implementation for limit property - """ - return self._limit - - @limit.setter - def limit(self, new_limit: int | None): - """ - Setter implementation for limit property - """ - self.set_limit(new_limit) - - @property - def filter(self): - """ - Getter implementation for filter property - """ - return self._filter - - @filter.setter - def filter(self, row_filter: RowFilter | dict[str, Any] | None): - """ - Setter implementation for filter property - """ - self.set_filter(row_filter) From caca14ccf10567df2400822abb9d481869b8e3b4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 11:42:54 -0700 Subject: [PATCH 165/349] improved to_dicts --- google/cloud/bigtable/read_rows_query.py | 26 +++++++++++++----------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 1df66658b..8d64371c3 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -70,6 +70,17 @@ def __init__( else None ) + def _to_dict(self) -> dict[str, bytes]: + """Converts this object to a dictionary""" + output = {} + if self.start is not None: + key = "start_key_closed" if self.start.is_inclusive else "start_key_open" + output[key] = self.start.key + if self.end is not None: + key = "end_key_closed" if self.end.is_inclusive else "end_key_open" + output[key] = self.end.key + return output + class ReadRowsQuery: """ Class to encapsulate details of a read row request @@ -77,7 +88,7 @@ class ReadRowsQuery: def __init__( self, - row_key: list[str | bytes] | str | bytes | None = None, + row_keys: list[str | bytes] | str | bytes | None = None, row_ranges: list[RowRange] | RowRange | None = None, limit: int | None = None, row_filter: RowFilter | dict[str, Any] | None = None, @@ -197,21 +208,12 @@ def shard(self, shard_keys: "RowKeySamples" | None = None) -> list[ReadRowsQuery """ raise NotImplementedError - def to_dict(self) -> dict[str, Any]: + def _to_dict(self) -> dict[str, Any]: """ Convert this query into a dictionary that can be used to construct a ReadRowsRequest protobuf """ - ranges = [] - for start, end in self.row_ranges: - new_range = {} - if start is not None: - key = "start_key_closed" if start.is_inclusive else "start_key_open" - new_range[key] = start.key - if end is not None: - key = "end_key_closed" if end.is_inclusive else "end_key_open" - new_range[key] = end.key - ranges.append(new_range) + ranges = [r._to_dict() for r in self.row_ranges] row_keys = list(self.row_keys) row_keys.sort() row_set = {"row_keys": row_keys, "row_ranges": ranges} From 5f9ce85b27ab09d950d053430d6eba5802cc594b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 11:59:28 -0700 Subject: [PATCH 166/349] improving row_ranges --- google/cloud/bigtable/read_rows_query.py | 28 +++++++++--------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 8d64371c3..4d8eae42a 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -174,28 +174,17 @@ def add_key(self, row_key: str | bytes): def add_range( self, - start_key: str | bytes | None = None, - end_key: str | bytes | None = None, - start_is_inclusive: bool | None = None, - end_is_inclusive: bool | None = None, + row_range: RowRange | dict[str, bytes], ): """ Add a range of row keys to this query. Args: - - start_key: the start of the range - if None, start_key is interpreted as the empty string, inclusive - - end_key: the end of the range - if None, end_key is interpreted as the infinite row key, exclusive - - start_is_inclusive: if True, the start key is included in the range - defaults to True if None. Must not be included if start_key is None - - end_is_inclusive: if True, the end key is included in the range - defaults to False if None. Must not be included if end_key is None + - row_range: a range of row keys to add to this query + Can be a RowRange object or a dict representation in + RowRange proto format """ - new_range = RowRange( - start_key, end_key, start_is_inclusive, end_is_inclusive - ) - self.row_ranges.append(new_range) + self.row_ranges.append(row_range) def shard(self, shard_keys: "RowKeySamples" | None = None) -> list[ReadRowsQuery]: """ @@ -213,10 +202,13 @@ def _to_dict(self) -> dict[str, Any]: Convert this query into a dictionary that can be used to construct a ReadRowsRequest protobuf """ - ranges = [r._to_dict() for r in self.row_ranges] + row_ranges = [] + for r in self.row_ranges: + dict_range = r._to_dict() if isinstance(r, RowRange) else r + row_ranges.append(dict_range) row_keys = list(self.row_keys) row_keys.sort() - row_set = {"row_keys": row_keys, "row_ranges": ranges} + row_set = {"row_keys": row_keys, "row_ranges": row_ranges} final_dict: dict[str, Any] = { "rows": row_set, } From 8e5f60a5e25dcd2b5eb900e209c74b0fd7d50682 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 12:03:29 -0700 Subject: [PATCH 167/349] fixed properties --- google/cloud/bigtable/read_rows_query.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 4d8eae42a..d8237f9f2 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -115,6 +115,10 @@ def __init__( self.filter: RowFilter | dict[str, Any] = row_filter @property + def limit(self) -> int | None: + return self._limit + + @property.setter def limit(self, new_limit: int | None): """ Set the maximum number of rows to return by this query. @@ -133,6 +137,10 @@ def limit(self, new_limit: int | None): self._limit = new_limit @property + def filter(self) -> RowFilter | dict[str, Any]: + return self._filter + + @property.setter def filter( self, row_filter: RowFilter | dict[str, Any] | None ): From 57184c18fec7e455e99bbbc2017f64c0b91edff4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 12:03:37 -0700 Subject: [PATCH 168/349] added type checking to range --- google/cloud/bigtable/read_rows_query.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index d8237f9f2..96e4bdc4a 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -192,6 +192,12 @@ def add_range( Can be a RowRange object or a dict representation in RowRange proto format """ + if not ( + isinstance(row_range, dict) + or isinstance(row_range, RowRange) + or row_range is None + ): + raise ValueError("row_range must be a RowRange or dict") self.row_ranges.append(row_range) def shard(self, shard_keys: "RowKeySamples" | None = None) -> list[ReadRowsQuery]: From 3eda7f4a6c8f97e549f75aaf2118c24658c24fde Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 13:04:45 -0700 Subject: [PATCH 169/349] got tests passing --- google/cloud/bigtable/__init__.py | 2 + google/cloud/bigtable/read_rows_query.py | 17 +- tests/unit/test_read_rows_query.py | 245 +++++++++++++---------- 3 files changed, 156 insertions(+), 108 deletions(-) diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index daa562c0c..251e41e42 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -22,6 +22,7 @@ from google.cloud.bigtable.client import Table from google.cloud.bigtable.read_rows_query import ReadRowsQuery +from google.cloud.bigtable.read_rows_query import RowRange from google.cloud.bigtable.row_response import RowResponse from google.cloud.bigtable.row_response import CellResponse @@ -43,6 +44,7 @@ "Table", "RowKeySamples", "ReadRowsQuery", + "RowRange", "MutationsBatcher", "Mutation", "BulkMutationsEntry", diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 96e4bdc4a..01b62507f 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -91,7 +91,7 @@ def __init__( row_keys: list[str | bytes] | str | bytes | None = None, row_ranges: list[RowRange] | RowRange | None = None, limit: int | None = None, - row_filter: RowFilter | dict[str, Any] | None = None, + row_filter: RowFilter | None = None, ): """ Create a new ReadRowsQuery @@ -105,10 +105,15 @@ def __init__( - row_filter: a RowFilter to apply to the query """ self.row_keys: set[bytes] = set() - self.row_ranges: list[RowRange] = [] - for range in row_ranges: - self.row_ranges.append(range) + self.row_ranges: list[RowRange | dict[str, bytes]] = [] + if row_ranges: + if isinstance(row_ranges, RowRange): + row_ranges = [row_ranges] + for r in row_ranges: + self.add_range(r) if row_keys: + if not isinstance(row_keys, list): + row_keys = [row_keys] for k in row_keys: self.add_key(k) self.limit: int | None = limit @@ -118,7 +123,7 @@ def __init__( def limit(self) -> int | None: return self._limit - @property.setter + @limit.setter def limit(self, new_limit: int | None): """ Set the maximum number of rows to return by this query. @@ -140,7 +145,7 @@ def limit(self, new_limit: int | None): def filter(self) -> RowFilter | dict[str, Any]: return self._filter - @property.setter + @filter.setter def filter( self, row_filter: RowFilter | dict[str, Any] | None ): diff --git a/tests/unit/test_read_rows_query.py b/tests/unit/test_read_rows_query.py index 822a75384..b4954b261 100644 --- a/tests/unit/test_read_rows_query.py +++ b/tests/unit/test_read_rows_query.py @@ -19,6 +19,95 @@ b"row_key_2", ] +class TestRowRange(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigtable.read_rows_query import RowRange + return RowRange + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + def test_ctor_start_end(self): + row_range = self._make_one("test_row", "test_row2") + self.assertEqual(row_range.start.key, "test_row".encode()) + self.assertEqual(row_range.end.key, "test_row2".encode()) + self.assertEqual(row_range.start.is_inclusive, True) + self.assertEqual(row_range.end.is_inclusive, False) + + def test_ctor_start_only(self): + row_range = self._make_one("test_row3") + self.assertEqual(row_range.start.key, "test_row3".encode()) + self.assertEqual(row_range.start.is_inclusive, True) + self.assertEqual(row_range.end, None) + + def test_ctor_end_only(self): + row_range = self._make_one(end_key="test_row4") + self.assertEqual(row_range.end.key, "test_row4".encode()) + self.assertEqual(row_range.end.is_inclusive, False) + self.assertEqual(row_range.start, None) + + def test_ctor_inclusive_flags(self): + row_range = self._make_one("test_row5", "test_row6", False, True) + self.assertEqual(row_range.start.key, "test_row5".encode()) + self.assertEqual(row_range.end.key, "test_row6".encode()) + self.assertEqual(row_range.start.is_inclusive, False) + self.assertEqual(row_range.end.is_inclusive, True) + + def test_ctor_defaults(self): + row_range = self._make_one() + self.assertEqual(row_range.start, None) + self.assertEqual(row_range.end, None) + + def test_ctor_flags_only(self): + with self.assertRaises(ValueError) as exc: + self._make_one(start_is_inclusive=True, end_is_inclusive=True) + self.assertEqual( + exc.exception.args, + ("start_is_inclusive must be set with start_key",), + ) + with self.assertRaises(ValueError) as exc: + self._make_one(start_is_inclusive=False, end_is_inclusive=False) + self.assertEqual( + exc.exception.args, + ("start_is_inclusive must be set with start_key",), + ) + with self.assertRaises(ValueError) as exc: + self._make_one(start_is_inclusive=False) + self.assertEqual( + exc.exception.args, + ("start_is_inclusive must be set with start_key",), + ) + with self.assertRaises(ValueError) as exc: + self._make_one(end_is_inclusive=True) + self.assertEqual( + exc.exception.args, ("end_is_inclusive must be set with end_key",) + ) + + def test_ctor_invalid_keys(self): + # test with invalid keys + with self.assertRaises(ValueError) as exc: + self._make_one(1, "2") + self.assertEqual(exc.exception.args, ("start_key must be a string or bytes",)) + with self.assertRaises(ValueError) as exc: + self._make_one("1", 2) + self.assertEqual(exc.exception.args, ("end_key must be a string or bytes",)) + + def test__to_dict_defaults(self): + row_range = self._make_one("test_row", "test_row2") + expected = { + "start_key_closed": b"test_row", + "end_key_open": b"test_row2", + } + self.assertEqual(row_range._to_dict(), expected) + + def test__to_dict_inclusive_flags(self): + row_range = self._make_one("test_row", "test_row2", False, True) + expected = { + "start_key_open": b"test_row", + "end_key_closed": b"test_row2", + } + self.assertEqual(row_range._to_dict(), expected) class TestReadRowsQuery(unittest.TestCase): @staticmethod @@ -60,15 +149,13 @@ def test_set_filter(self): filter1 = RowFilterChain() query = self._make_one() self.assertEqual(query.filter, None) - result = query.set_filter(filter1) + query.filter = filter1 self.assertEqual(query.filter, filter1) - self.assertEqual(result, query) filter2 = RowFilterChain() - result = query.set_filter(filter2) + query.filter = filter2 self.assertEqual(query.filter, filter2) - result = query.set_filter(None) + query.filter = None self.assertEqual(query.filter, None) - self.assertEqual(result, query) query.filter = RowFilterChain() self.assertEqual(query.filter, RowFilterChain()) with self.assertRaises(ValueError) as exc: @@ -85,10 +172,9 @@ def test_set_filter_dict(self): filter1_dict = filter1.to_dict() query = self._make_one() self.assertEqual(query.filter, None) - result = query.set_filter(filter1_dict) + query.filter = filter1_dict self.assertEqual(query.filter, filter1_dict) - self.assertEqual(result, query) - output = query.to_dict() + output = query._to_dict() self.assertEqual(output["filter"], filter1_dict) proto_output = ReadRowsRequest(**output) self.assertEqual(proto_output.filter, filter1._to_pb()) @@ -99,63 +185,58 @@ def test_set_filter_dict(self): def test_set_limit(self): query = self._make_one() self.assertEqual(query.limit, None) - result = query.set_limit(10) + query.limit = 10 self.assertEqual(query.limit, 10) - self.assertEqual(result, query) query.limit = 9 self.assertEqual(query.limit, 9) - result = query.set_limit(0) + query.limit = 0 self.assertEqual(query.limit, 0) - self.assertEqual(result, query) with self.assertRaises(ValueError) as exc: - query.set_limit(-1) + query.limit = -1 self.assertEqual(exc.exception.args, ("limit must be >= 0",)) with self.assertRaises(ValueError) as exc: query.limit = -100 self.assertEqual(exc.exception.args, ("limit must be >= 0",)) - def test_add_rows_str(self): + def test_add_key_str(self): query = self._make_one() self.assertEqual(query.row_keys, set()) input_str = "test_row" - result = query.add_rows(input_str) + query.add_key(input_str) self.assertEqual(len(query.row_keys), 1) self.assertIn(input_str.encode(), query.row_keys) - self.assertEqual(result, query) input_str2 = "test_row2" - result = query.add_rows(input_str2) + query.add_key(input_str2) self.assertEqual(len(query.row_keys), 2) self.assertIn(input_str.encode(), query.row_keys) self.assertIn(input_str2.encode(), query.row_keys) - self.assertEqual(result, query) - def test_add_rows_bytes(self): + def test_add_key_bytes(self): query = self._make_one() self.assertEqual(query.row_keys, set()) input_bytes = b"test_row" - result = query.add_rows(input_bytes) + query.add_key(input_bytes) self.assertEqual(len(query.row_keys), 1) self.assertIn(input_bytes, query.row_keys) - self.assertEqual(result, query) input_bytes2 = b"test_row2" - result = query.add_rows(input_bytes2) + query.add_key(input_bytes2) self.assertEqual(len(query.row_keys), 2) self.assertIn(input_bytes, query.row_keys) self.assertIn(input_bytes2, query.row_keys) - self.assertEqual(result, query) def test_add_rows_batch(self): query = self._make_one() self.assertEqual(query.row_keys, set()) input_batch = ["test_row", b"test_row2", "test_row3"] - result = query.add_rows(input_batch) + for k in input_batch: + query.add_key(k) self.assertEqual(len(query.row_keys), 3) self.assertIn(b"test_row", query.row_keys) self.assertIn(b"test_row2", query.row_keys) self.assertIn(b"test_row3", query.row_keys) - self.assertEqual(result, query) # test adding another batch - query.add_rows(["test_row4", b"test_row5"]) + for k in ['test_row4', b"test_row5"]: + query.add_key(k) self.assertEqual(len(query.row_keys), 5) self.assertIn(input_batch[0].encode(), query.row_keys) self.assertIn(input_batch[1], query.row_keys) @@ -163,14 +244,14 @@ def test_add_rows_batch(self): self.assertIn(b"test_row4", query.row_keys) self.assertIn(b"test_row5", query.row_keys) - def test_add_rows_invalid(self): + def test_add_key_invalid(self): query = self._make_one() with self.assertRaises(ValueError) as exc: - query.add_rows(1) - self.assertEqual(exc.exception.args, ("row_keys must be strings or bytes",)) + query.add_key(1) + self.assertEqual(exc.exception.args, ("row_key must be string or bytes",)) with self.assertRaises(ValueError) as exc: - query.add_rows(["s", 0]) - self.assertEqual(exc.exception.args, ("row_keys must be strings or bytes",)) + query.add_key(["s"]) + self.assertEqual(exc.exception.args, ("row_key must be string or bytes",)) def test_duplicate_rows(self): # should only hold one of each input key @@ -181,82 +262,38 @@ def test_duplicate_rows(self): self.assertIn(key_1, query.row_keys) self.assertIn(key_2, query.row_keys) key_3 = "test_row3" - query.add_rows([key_3 for _ in range(10)]) + for i in range(10): + query.add_key(key_3) self.assertEqual(len(query.row_keys), 3) def test_add_range(self): - # test with start and end keys + from google.cloud.bigtable.read_rows_query import RowRange query = self._make_one() self.assertEqual(query.row_ranges, []) - result = query.add_range("test_row", "test_row2") + input_range = RowRange(start_key=b"test_row") + query.add_range(input_range) self.assertEqual(len(query.row_ranges), 1) - self.assertEqual(query.row_ranges[0][0].key, "test_row".encode()) - self.assertEqual(query.row_ranges[0][1].key, "test_row2".encode()) - self.assertEqual(query.row_ranges[0][0].is_inclusive, True) - self.assertEqual(query.row_ranges[0][1].is_inclusive, False) - self.assertEqual(result, query) - # test with start key only - result = query.add_range("test_row3") + self.assertEqual(query.row_ranges[0], input_range) + input_range2 = RowRange(start_key=b"test_row2") + query.add_range(input_range2) self.assertEqual(len(query.row_ranges), 2) - self.assertEqual(query.row_ranges[1][0].key, "test_row3".encode()) - self.assertEqual(query.row_ranges[1][1], None) - self.assertEqual(result, query) - # test with end key only - result = query.add_range(start_key=None, end_key="test_row5") - self.assertEqual(len(query.row_ranges), 3) - self.assertEqual(query.row_ranges[2][0], None) - self.assertEqual(query.row_ranges[2][1].key, "test_row5".encode()) - self.assertEqual(query.row_ranges[2][1].is_inclusive, False) - # test with start and end keys and inclusive flags - result = query.add_range(b"test_row6", b"test_row7", False, True) - self.assertEqual(len(query.row_ranges), 4) - self.assertEqual(query.row_ranges[3][0].key, b"test_row6") - self.assertEqual(query.row_ranges[3][1].key, b"test_row7") - self.assertEqual(query.row_ranges[3][0].is_inclusive, False) - self.assertEqual(query.row_ranges[3][1].is_inclusive, True) - # test with nothing passed - result = query.add_range() - self.assertEqual(len(query.row_ranges), 5) - self.assertEqual(query.row_ranges[4][0], None) - self.assertEqual(query.row_ranges[4][1], None) - # test with inclusive flags only - with self.assertRaises(ValueError) as exc: - query.add_range(start_is_inclusive=True, end_is_inclusive=True) - self.assertEqual( - exc.exception.args, - ("start_is_inclusive must be set with start_key",), - ) - with self.assertRaises(ValueError) as exc: - query.add_range(start_is_inclusive=False, end_is_inclusive=False) - self.assertEqual( - exc.exception.args, - ("start_is_inclusive must be set with start_key",), - ) - with self.assertRaises(ValueError) as exc: - query.add_range(start_is_inclusive=False) - self.assertEqual( - exc.exception.args, - ("start_is_inclusive must be set with start_key",), - ) - with self.assertRaises(ValueError) as exc: - query.add_range(end_is_inclusive=True) - self.assertEqual( - exc.exception.args, ("end_is_inclusive must be set with end_key",) - ) - # test with invalid keys - with self.assertRaises(ValueError) as exc: - query.add_range(1, "2") - self.assertEqual(exc.exception.args, ("start_key must be a string or bytes",)) - with self.assertRaises(ValueError) as exc: - query.add_range("1", 2) - self.assertEqual(exc.exception.args, ("end_key must be a string or bytes",)) + self.assertEqual(query.row_ranges[0], input_range) + self.assertEqual(query.row_ranges[1], input_range2) + + def test_add_range_dict(self): + query = self._make_one() + self.assertEqual(query.row_ranges, []) + input_range = {"start_key_closed": b"test_row"} + query.add_range(input_range) + self.assertEqual(len(query.row_ranges), 1) + self.assertEqual(query.row_ranges[0], input_range) def test_to_dict_rows_default(self): # dictionary should be in rowset proto format from google.cloud.bigtable_v2.types.bigtable import ReadRowsRequest query = self._make_one() - output = query.to_dict() + output = query._to_dict() self.assertTrue(isinstance(output, dict)) self.assertEqual(len(output.keys()), 1) expected = {"rows": {"row_keys": [], "row_ranges": []}} @@ -272,17 +309,21 @@ def test_to_dict_rows_populated(self): # dictionary should be in rowset proto format from google.cloud.bigtable_v2.types.bigtable import ReadRowsRequest from google.cloud.bigtable.row_filters import PassAllFilter + from google.cloud.bigtable.read_rows_query import RowRange row_filter = PassAllFilter(False) query = self._make_one(limit=100, row_filter=row_filter) - query.add_range("test_row", "test_row2") - query.add_range("test_row3") - query.add_range(start_key=None, end_key="test_row5") - query.add_range(b"test_row6", b"test_row7", False, True) - query.add_range() - query.add_rows(["test_row", b"test_row2", "test_row3"]) - query.add_rows(["test_row3", b"test_row4"]) - output = query.to_dict() + query.add_range(RowRange("test_row", "test_row2")) + query.add_range(RowRange("test_row3")) + query.add_range(RowRange(start_key=None, end_key="test_row5")) + query.add_range(RowRange(b"test_row6", b"test_row7", False, True)) + query.add_range(RowRange()) + query.add_key("test_row") + query.add_key(b"test_row2") + query.add_key("test_row3") + query.add_key(b"test_row3") + query.add_key(b"test_row4") + output = query._to_dict() self.assertTrue(isinstance(output, dict)) request_proto = ReadRowsRequest(**output) rowset_proto = request_proto.rows From 65f5a2ae0eb2e08dcb98e17542bef33f5feea51a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 13:08:18 -0700 Subject: [PATCH 170/349] blacken, mypy --- google/cloud/bigtable/read_rows_query.py | 16 +++++++--------- tests/unit/test_read_rows_query.py | 6 +++++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 01b62507f..9704606d1 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -25,9 +25,11 @@ @dataclass class _RangePoint: """Model class for a point in a row range""" + key: row_key is_inclusive: bool + @dataclass class RowRange: start: _RangePoint | None @@ -65,9 +67,7 @@ def __init__( else None ) self.end = ( - _RangePoint(end_key, end_is_inclusive) - if end_key is not None - else None + _RangePoint(end_key, end_is_inclusive) if end_key is not None else None ) def _to_dict(self) -> dict[str, bytes]: @@ -81,6 +81,7 @@ def _to_dict(self) -> dict[str, bytes]: output[key] = self.end.key return output + class ReadRowsQuery: """ Class to encapsulate details of a read row request @@ -117,7 +118,7 @@ def __init__( for k in row_keys: self.add_key(k) self.limit: int | None = limit - self.filter: RowFilter | dict[str, Any] = row_filter + self.filter: RowFilter | dict[str, Any] | None = row_filter @property def limit(self) -> int | None: @@ -142,13 +143,11 @@ def limit(self, new_limit: int | None): self._limit = new_limit @property - def filter(self) -> RowFilter | dict[str, Any]: + def filter(self) -> RowFilter | dict[str, Any] | None: return self._filter @filter.setter - def filter( - self, row_filter: RowFilter | dict[str, Any] | None - ): + def filter(self, row_filter: RowFilter | dict[str, Any] | None): """ Set a RowFilter to apply to this query @@ -200,7 +199,6 @@ def add_range( if not ( isinstance(row_range, dict) or isinstance(row_range, RowRange) - or row_range is None ): raise ValueError("row_range must be a RowRange or dict") self.row_ranges.append(row_range) diff --git a/tests/unit/test_read_rows_query.py b/tests/unit/test_read_rows_query.py index b4954b261..aa690bc86 100644 --- a/tests/unit/test_read_rows_query.py +++ b/tests/unit/test_read_rows_query.py @@ -19,10 +19,12 @@ b"row_key_2", ] + class TestRowRange(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigtable.read_rows_query import RowRange + return RowRange def _make_one(self, *args, **kwargs): @@ -109,6 +111,7 @@ def test__to_dict_inclusive_flags(self): } self.assertEqual(row_range._to_dict(), expected) + class TestReadRowsQuery(unittest.TestCase): @staticmethod def _get_target_class(): @@ -235,7 +238,7 @@ def test_add_rows_batch(self): self.assertIn(b"test_row2", query.row_keys) self.assertIn(b"test_row3", query.row_keys) # test adding another batch - for k in ['test_row4', b"test_row5"]: + for k in ["test_row4", b"test_row5"]: query.add_key(k) self.assertEqual(len(query.row_keys), 5) self.assertIn(input_batch[0].encode(), query.row_keys) @@ -268,6 +271,7 @@ def test_duplicate_rows(self): def test_add_range(self): from google.cloud.bigtable.read_rows_query import RowRange + query = self._make_one() self.assertEqual(query.row_ranges, []) input_range = RowRange(start_key=b"test_row") From 3e724dbcb00bc4baaab1504a2d96690dc258588f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 13:23:08 -0700 Subject: [PATCH 171/349] ran blacken --- google/cloud/bigtable/read_rows_query.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 9704606d1..9fd349d5f 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -196,10 +196,7 @@ def add_range( Can be a RowRange object or a dict representation in RowRange proto format """ - if not ( - isinstance(row_range, dict) - or isinstance(row_range, RowRange) - ): + if not (isinstance(row_range, dict) or isinstance(row_range, RowRange)): raise ValueError("row_range must be a RowRange or dict") self.row_ranges.append(row_range) From 45eadce8006dcfc302bad2328e644abebbc6ff64 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 13:39:45 -0700 Subject: [PATCH 172/349] improved API usage --- google/cloud/bigtable/client.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 2da7274b6..abba58068 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -36,6 +36,7 @@ import google.auth.credentials from google.api_core import retry_async as retries +from google.api_core import exceptions as core_exceptions import google.auth._default from google.api_core import client_options as client_options_lib @@ -351,8 +352,6 @@ async def read_rows_stream( self, query: ReadRowsQuery | dict[str, Any], *, - shard: bool = False, - limit: int | None, cache_size_limit: int | None = None, operation_timeout: int | float | None = 60, per_row_timeout: int | float | None = 10, @@ -372,10 +371,6 @@ async def read_rows_stream( Args: - query: contains details about which rows to return - - shard: if True, will attempt to split up and distribute query to multiple - backend nodes in parallel - - limit: a limit on the number of rows to return. Actual limit will be - min(limit, query.limit) - cache_size: the number of rows to cache in memory. If None, no limits. Defaults to None - operation_timeout: the time budget for the entire operation, in seconds. @@ -416,7 +411,7 @@ def on_error(exc): core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable ), - timeout=timeout, + timeout=operation_timeout, on_error=on_error, initial=0.1, multiplier=2, @@ -424,8 +419,8 @@ def on_error(exc): is_generator=True ) retryable_fn = retry(self._read_rows_retryable) - emitted_rows:Set[bytes] = set({}) - async for result in retryable_fn(requestm emmited_rows, operation_timeout): + emitted_rows:set[bytes] = set({}) + async for result in retryable_fn(request, emitted_rows, per_request_timeout): if isinstance(result, Row): yield result elif isinstance(result, Exception): @@ -433,9 +428,9 @@ def on_error(exc): async def _read_rows_retryable( - self, request:dict[str, Any], emitted_rows: set[bytes], operation_timeout=60.0, revise_on_retry=True - ) -> AsyncGenerator[Row, None]: - if revise_request_on_retry and len(emitted_rows) > 0: + self, request:dict[str, Any], emitted_rows: set[bytes], per_request_timeout=None, revise_on_retry=True, cache_size_limit=None, + ) -> AsyncIterable[Row, None]: + if revise_on_retry and len(emitted_rows) > 0: # if this is a retry, try to trim down the request to avoid ones we've already processed request["rows"] = self._revise_rowset( request.get("rows", None), emitted_rows @@ -443,10 +438,10 @@ async def _read_rows_retryable( gapic_stream_handler = await self._gapic_client.read_rows( request=request, app_profile_id=self.app_profile_id, - timeout=operation_timeout, + timeout=per_request_timeout, ) merger = RowMerger() - async for row in merger.merge_row_stream(gapic_stream_handler): + async for row in merger.merge_row_stream_with_cache(gapic_stream_handler, cache_size_limit): if row.row_key not in emitted_rows: if not isinstance(row, _LastScannedRow): # last scanned rows are not emitted From c06213ff3bbf88e46e933de7b471c8968c874453 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 13:41:20 -0700 Subject: [PATCH 173/349] use invalid chunk --- google/cloud/bigtable/client.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index abba58068..3d5be8fb5 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -33,6 +33,7 @@ from google.api_core.exceptions import GoogleAPICallError from google.cloud.bigtable.row_merger import RowMerger +from google.cloud.bigtable.row_merger import InvalidChunk import google.auth.credentials from google.api_core import retry_async as retries @@ -403,11 +404,10 @@ async def read_rows_stream( request["table_name"] = self._gapic_client.table_name(self.table_id) def on_error(exc): - print(f"RETRYING: {exc}") return exc retry = retries.AsyncRetry( predicate=retries.if_exception_type( - RuntimeError, + InvalidChunk, core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable ), @@ -423,8 +423,6 @@ def on_error(exc): async for result in retryable_fn(request, emitted_rows, per_request_timeout): if isinstance(result, Row): yield result - elif isinstance(result, Exception): - print(f"Exception: {result}") async def _read_rows_retryable( From 6e75a2fd33557db37688ee80a7b2a750973d84e0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 13:58:34 -0700 Subject: [PATCH 174/349] added per request timeouts --- google/cloud/bigtable/client.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 3d5be8fb5..3d2be8dc6 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -409,7 +409,7 @@ def on_error(exc): predicate=retries.if_exception_type( InvalidChunk, core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable + core_exceptions.ServiceUnavailable, ), timeout=operation_timeout, on_error=on_error, @@ -420,13 +420,13 @@ def on_error(exc): ) retryable_fn = retry(self._read_rows_retryable) emitted_rows:set[bytes] = set({}) - async for result in retryable_fn(request, emitted_rows, per_request_timeout): + async for result in retryable_fn(request, emitted_rows, per_request_timeout, per_request_timeout): if isinstance(result, Row): yield result async def _read_rows_retryable( - self, request:dict[str, Any], emitted_rows: set[bytes], per_request_timeout=None, revise_on_retry=True, cache_size_limit=None, + self, request:dict[str, Any], emitted_rows: set[bytes], per_request_timeout=None, per_row_timeout=None, revise_on_retry=True, cache_size_limit=None, ) -> AsyncIterable[Row, None]: if revise_on_retry and len(emitted_rows) > 0: # if this is a retry, try to trim down the request to avoid ones we've already processed @@ -439,12 +439,22 @@ async def _read_rows_retryable( timeout=per_request_timeout, ) merger = RowMerger() - async for row in merger.merge_row_stream_with_cache(gapic_stream_handler, cache_size_limit): - if row.row_key not in emitted_rows: - if not isinstance(row, _LastScannedRow): - # last scanned rows are not emitted - yield row - emitted_rows.add(row.row_key) + generator = merger.merge_row_stream_with_cache(gapic_stream_handler, cache_size_limit) + while True: + try: + row = await asyncio.wait_for(generator.__anext__(), timeout=per_row_timeout) + if row.row_key not in emitted_rows: + if not isinstance(row, _LastScannedRow): + # last scanned rows are not emitted + yield row + emitted_rows.add(row.row_key) + except asyncio.TimeoutError: + generator.close() + raise core_exceptions.DeadlineExceeded("per_row_timeout exceeded") + except StopAsyncIteration: + break + + def _revise_rowset( self, row_set: dict[str, Any]|None, emitted_rows: set[bytes] From a205e9390e5e688e3a799774d360e6e79959cbb8 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 14:40:22 -0700 Subject: [PATCH 175/349] account for RequestStats --- google/cloud/bigtable/client.py | 3 ++- google/cloud/bigtable/row_merger.py | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index de4637289..53148467d 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -164,7 +164,8 @@ async def read_rows_stream( ) merger = RowMerger() async for row in merger.merge_row_stream(gapic_stream_handler): - yield row + if isinstance(row, Row): + yield row async def read_rows( self, diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index a711a5cf2..b1c98480e 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -15,6 +15,7 @@ from __future__ import annotations from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse +from google.cloud.bigtable_v2.types import RequestStats from google.cloud.bigtable.row import Row, Cell, _LastScannedRow import asyncio @@ -51,7 +52,7 @@ def __init__(self): async def merge_row_stream( self, request_generator: AsyncIterable[ReadRowsResponse] - ) -> AsyncGenerator[Row, None]: + ) -> AsyncGenerator[Row|RequestStats, None]: """ Consume chunks from a ReadRowsResponse stream into a set of Rows @@ -75,6 +76,9 @@ async def merge_row_stream( complete_row = self.state_machine.handle_chunk(chunk) if complete_row is not None: yield complete_row + # yield request stats if present + if response_pb.stats: + yield response_pb.stats if not self.state_machine.is_terminal_state(): # read rows is complete, but there's still data in the merger raise InvalidChunk("read_rows completed with partial state remaining") @@ -92,7 +96,7 @@ async def merge_row_stream_with_cache( self, request_generator: AsyncIterable[ReadRowsResponse], max_cache_size: int | None = None, - ) -> AsyncGenerator[Row, None]: + ) -> AsyncGenerator[Row|RequestStats, None]: """ Consume chunks from a ReadRowsResponse stream into a set of Rows, with a local cache to decouple the producer from the consumer From ce3eb75030b059c374c4ba088237b112295aa87c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 14:52:29 -0700 Subject: [PATCH 176/349] added output generator wrapper --- google/cloud/bigtable/client.py | 39 ++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 53148467d..6b1a16647 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -15,12 +15,13 @@ from __future__ import annotations -from typing import Any, AsyncIterable, TYPE_CHECKING +from typing import Any, AsyncIterable, AsyncIterator, TYPE_CHECKING -from google.cloud.client import ClientWithProject +import time +from google.cloud.client import ClientWithProject from google.cloud.bigtable.row_merger import RowMerger - +from google.cloud.bigtable_v2.types import RequestStats import google.auth.credentials if TYPE_CHECKING: @@ -110,7 +111,7 @@ async def read_rows_stream( idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, metadata: list[tuple[str, str]] | None = None, - ) -> AsyncIterable[Row]: + ) -> AsyncIterator[Row]: """ Returns a generator to asynchronously stream back row data. @@ -162,10 +163,7 @@ async def read_rows_stream( app_profile_id=self.app_profile_id, timeout=operation_timeout, ) - merger = RowMerger() - async for row in merger.merge_row_stream(gapic_stream_handler): - if isinstance(row, Row): - yield row + return ReadRowsGenerator(gapic_stream_handler) async def read_rows( self, @@ -446,3 +444,28 @@ async def read_modify_write_row( - GoogleAPIError exceptions from grpc call """ raise NotImplementedError + + +class ReadRowsGenerator(): + """ + User-facing async generator for streaming read_rows responses + """ + + def __init__(self, gapic_stream:AsyncIterable["ReadRowsResponse"]): + merger = RowMerger() + self._inner_gen = merger.merge_row_stream(gapic_stream) + self.request_stats = None + self.last_interaction_time = time.time() + + async def __aiter__(self) -> AsyncIterator[Row]: + return self + + async def __anext__(self) -> Row: + self.last_interaction_time = time.time() + next_item = await self._inner_gen.__anext__() + while not isinstance(next_item, Row): + if isinstance(next_item, RequestStats): + self.request_stats = next_item + next_item = await self._inner_gen.__anext__() + return next_item + From 74029c90ff8f488beb678f34adfd3ab46afd50fb Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 15:13:43 -0700 Subject: [PATCH 177/349] updated template --- gapic-generator-fork | 2 +- .../transports/pooled_grpc_asyncio.py | 77 ++++++++++++------- 2 files changed, 49 insertions(+), 30 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 1a5660273..10c309f7c 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 1a56602733106b80142d944885fc37374d13f9ef +Subproject commit 10c309f7cf88f403f02d12fa9c24b463db24aa8c diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index 87b568c4c..296f7b9f7 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -41,31 +41,45 @@ from .base import BigtableTransport, DEFAULT_CLIENT_INFO from .grpc import BigtableGrpcTransport -class PooledMultiCallable(): - def __init__(self, channel_pool:"PooledChannel", *args, **kwargs): +class PooledMultiCallable: + def __init__(self, channel_pool: "PooledChannel", *args, **kwargs): self._init_args = args self._init_kwargs = kwargs self.next_channel_fn = channel_pool.next_channel + class PooledUnaryUnaryMultiCallable(PooledMultiCallable, aio.UnaryUnaryMultiCallable): def __call__(self, *args, **kwargs) -> aio.UnaryUnaryCall: - return self.next_channel_fn().unary_unary(*self._init_args, **self._init_kwargs)(*args, **kwargs) + return self.next_channel_fn().unary_unary( + *self._init_args, **self._init_kwargs + )(*args, **kwargs) + class PooledUnaryStreamMultiCallable(PooledMultiCallable, aio.UnaryStreamMultiCallable): def __call__(self, *args, **kwargs) -> aio.UnaryStreamCall: - return self.next_channel_fn().unary_stream(*self._init_args, **self._init_kwargs)(*args, **kwargs) + return self.next_channel_fn().unary_stream( + *self._init_args, **self._init_kwargs + )(*args, **kwargs) + class PooledStreamUnaryMultiCallable(PooledMultiCallable, aio.StreamUnaryMultiCallable): def __call__(self, *args, **kwargs) -> aio.StreamUnaryCall: - return self.next_channel_fn().stream_unary(*self._init_args, **self._init_kwargs)(*args, **kwargs) + return self.next_channel_fn().stream_unary( + *self._init_args, **self._init_kwargs + )(*args, **kwargs) -class PooledStreamStreamMultiCallable(PooledMultiCallable, aio.StreamStreamMultiCallable): + +class PooledStreamStreamMultiCallable( + PooledMultiCallable, aio.StreamStreamMultiCallable +): def __call__(self, *args, **kwargs) -> aio.StreamStreamCall: - return self.next_channel_fn().stream_stream(*self._init_args, **self._init_kwargs)(*args, **kwargs) + return self.next_channel_fn().stream_stream( + *self._init_args, **self._init_kwargs + )(*args, **kwargs) -class PooledChannel(aio.Channel): +class PooledChannel(aio.Channel): def __init__( self, pool_size: int = 3, @@ -76,9 +90,17 @@ def __init__( quota_project_id: Optional[str] = None, **kwargs, ): - self._pool : List[aio.Channel] = [] + self._pool: List[aio.Channel] = [] self._next_idx = 0 - self._create_channel = partial(grpc_helpers_async.create_channel, target=host, credentials=credentials, credentials_file=credentials_file, scopes=scopes, quota_project_id=quota_project_id, **kwargs) + self._create_channel = partial( + grpc_helpers_async.create_channel, + target=host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + **kwargs, + ) for i in range(pool_size): self._pool.append(self._create_channel()) @@ -147,6 +169,7 @@ async def replace_channel( await old_channel.close(grace=grace) return new_channel + class PooledBigtableGrpcAsyncIOTransport(BigtableTransport): """Pooled gRPC AsyncIO backend transport for Bigtable. @@ -332,21 +355,21 @@ def __init__( ) self._quota_project_id = quota_project_id self._grpc_channel = type(self).create_channel( - pool_size, - self._host, - # use the credentials which are saved - credentials=self._credentials, - # Set ``credentials_file`` to ``None`` here as - # the credentials that we saved earlier should be used. - credentials_file=None, - scopes=self._scopes, - ssl_credentials=self._ssl_channel_credentials, - quota_project_id=self._quota_project_id, - options=[ - ("grpc.max_send_message_length", -1), - ("grpc.max_receive_message_length", -1), - ], - ) + pool_size, + self._host, + # use the credentials which are saved + credentials=self._credentials, + # Set ``credentials_file`` to ``None`` here as + # the credentials that we saved earlier should be used. + credentials_file=None, + scopes=self._scopes, + ssl_credentials=self._ssl_channel_credentials, + quota_project_id=self._quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) # Wrap messages. This must be done after pool is populated self._prep_wrapped_messages(client_info) @@ -407,7 +430,6 @@ def read_rows( ) return self._stubs["read_rows"] - @property def sample_row_keys( self, @@ -578,7 +600,6 @@ def read_modify_write_row( ) return self._stubs["read_modify_write_row"] - @property def generate_initial_change_stream_partitions( self, @@ -641,8 +662,6 @@ def read_change_stream( ) return self._stubs["read_change_stream"] - - def close(self): return self.grpc_channel.close() From 7f2be30be8502716829f34dd796ccf9258bec178 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 16:08:40 -0700 Subject: [PATCH 178/349] got tests passing --- gapic-generator-fork | 2 +- .../transports/pooled_grpc_asyncio.py | 39 ++- tests/unit/gapic/bigtable_v2/test_bigtable.py | 249 ++++++++++-------- 3 files changed, 164 insertions(+), 126 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 10c309f7c..4781d8cf9 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 10c309f7cf88f403f02d12fa9c24b463db24aa8c +Subproject commit 4781d8cf9fe5979522f5736e9a2a1d4525881a8e diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index 296f7b9f7..83df179d9 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -214,8 +214,9 @@ def create_channel( quota_project_id: Optional[str] = None, **kwargs, ) -> aio.Channel: - """Create and return a gRPC AsyncIO channel object. + """Create and return a PooledChannel object, representing a pool of gRPC AsyncIO channels Args: + pool_size (int): the number of channels in the pool host (Optional[str]): The host for the channel to use. credentials (Optional[~.Credentials]): The authorization credentials to attach to requests. These @@ -233,7 +234,7 @@ def create_channel( kwargs (Optional[dict]): Keyword arguments, which are passed to the channel creation. Returns: - aio.Channel: A gRPC AsyncIO channel object. + PooledChannel: a channel pool object """ return PooledChannel( @@ -315,7 +316,7 @@ def __init__( if pool_size <= 0: raise ValueError(f"invalid pool_size: {pool_size}") self._ssl_channel_credentials = ssl_channel_credentials - self._stubs: Dict[Tuple[aio.Channel, str], Callable] = {} + self._stubs: Dict[str, Callable] = {} if api_mtls_endpoint: warnings.warn("api_mtls_endpoint is deprecated", DeprecationWarning) @@ -370,12 +371,14 @@ def __init__( ("grpc.max_receive_message_length", -1), ], ) - # Wrap messages. This must be done after pool is populated + + # Wrap messages. This must be done after self._grpc_channel exists self._prep_wrapped_messages(client_info) @property def grpc_channel(self) -> aio.Channel: """Create the channel designed to connect to this service. + This property caches on the instance; repeated calls return the same channel. """ @@ -406,12 +409,14 @@ def read_rows( self, ) -> Callable[[bigtable.ReadRowsRequest], Awaitable[bigtable.ReadRowsResponse]]: r"""Return a callable for the read rows method over gRPC. + Streams back the contents of all requested rows in key order, optionally applying the same Reader filter to each. Depending on their size, rows and cells may be broken up across multiple responses, but atomicity of each row will still be preserved. See the ReadRowsResponse documentation for details. + Returns: Callable[[~.ReadRowsRequest], Awaitable[~.ReadRowsResponse]]: @@ -436,12 +441,14 @@ def sample_row_keys( ) -> Callable[ [bigtable.SampleRowKeysRequest], Awaitable[bigtable.SampleRowKeysResponse] ]: - """Return a callable for the sample row keys method over gRPC. + r"""Return a callable for the sample row keys method over gRPC. + Returns a sample of row keys in the table. The returned row keys will delimit contiguous sections of the table of approximately equal size, which can be used to break up the data for distributed tasks like mapreduces. + Returns: Callable[[~.SampleRowKeysRequest], Awaitable[~.SampleRowKeysResponse]]: @@ -465,8 +472,10 @@ def mutate_row( self, ) -> Callable[[bigtable.MutateRowRequest], Awaitable[bigtable.MutateRowResponse]]: r"""Return a callable for the mutate row method over gRPC. + Mutates a row atomically. Cells already present in the row are left unchanged unless explicitly changed by ``mutation``. + Returns: Callable[[~.MutateRowRequest], Awaitable[~.MutateRowResponse]]: @@ -489,10 +498,12 @@ def mutate_row( def mutate_rows( self, ) -> Callable[[bigtable.MutateRowsRequest], Awaitable[bigtable.MutateRowsResponse]]: - """Return a callable for the mutate rows method over gRPC. + r"""Return a callable for the mutate rows method over gRPC. + Mutates multiple rows in a batch. Each individual row is mutated atomically as in MutateRow, but the entire batch is not executed atomically. + Returns: Callable[[~.MutateRowsRequest], Awaitable[~.MutateRowsResponse]]: @@ -518,9 +529,11 @@ def check_and_mutate_row( [bigtable.CheckAndMutateRowRequest], Awaitable[bigtable.CheckAndMutateRowResponse], ]: - """Return a callable for the check and mutate row method over gRPC. + r"""Return a callable for the check and mutate row method over gRPC. + Mutates a row atomically based on the output of a predicate Reader filter. + Returns: Callable[[~.CheckAndMutateRowRequest], Awaitable[~.CheckAndMutateRowResponse]]: @@ -545,10 +558,12 @@ def ping_and_warm( ) -> Callable[ [bigtable.PingAndWarmRequest], Awaitable[bigtable.PingAndWarmResponse] ]: - """Return a callable for the ping and warm method over gRPC. + r"""Return a callable for the ping and warm method over gRPC. + Warm up associated instance metadata for this connection. This call is not required but may be useful for connection keep-alive. + Returns: Callable[[~.PingAndWarmRequest], Awaitable[~.PingAndWarmResponse]]: @@ -574,7 +589,8 @@ def read_modify_write_row( [bigtable.ReadModifyWriteRowRequest], Awaitable[bigtable.ReadModifyWriteRowResponse], ]: - """Return a callable for the read modify write row method over gRPC. + r"""Return a callable for the read modify write row method over gRPC. + Modifies a row atomically on the server. The method reads the latest existing timestamp and value from the specified columns and writes a new entry based on @@ -582,6 +598,7 @@ def read_modify_write_row( the timestamp is the greater of the existing timestamp or the current server time. The method returns the new contents of all modified cells. + Returns: Callable[[~.ReadModifyWriteRowRequest], Awaitable[~.ReadModifyWriteRowResponse]]: @@ -609,10 +626,12 @@ def generate_initial_change_stream_partitions( ]: r"""Return a callable for the generate initial change stream partitions method over gRPC. + NOTE: This API is intended to be used by Apache Beam BigtableIO. Returns the current list of partitions that make up the table's change stream. The union of partitions will cover the entire keyspace. Partitions can be read with ``ReadChangeStream``. + Returns: Callable[[~.GenerateInitialChangeStreamPartitionsRequest], Awaitable[~.GenerateInitialChangeStreamPartitionsResponse]]: @@ -640,10 +659,12 @@ def read_change_stream( [bigtable.ReadChangeStreamRequest], Awaitable[bigtable.ReadChangeStreamResponse] ]: r"""Return a callable for the read change stream method over gRPC. + NOTE: This API is intended to be used by Apache Beam BigtableIO. Reads changes from a table's change stream. Changes will reflect both user-initiated mutations and mutations that are caused by garbage collection. + Returns: Callable[[~.ReadChangeStreamRequest], Awaitable[~.ReadChangeStreamResponse]]: diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index 23250124c..9a10ba94a 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -744,26 +744,27 @@ def test_read_rows(request_type, transport: str = "grpc"): def test_read_rows_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) + with mock.patch.object( + transports.pooled_grpc_asyncio.PooledChannel, "next_channel" + ) as next_channel: + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} - with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] + channel = client.transport._grpc_channel._pool[ + client.transport._grpc_channel._next_idx + ] next_channel.return_value = channel response = client.read_rows(request) # Establish that next_channel was called next_channel.assert_called_once() - # Establish that stubs has been populated for the channel - stub_key = (channel, "read_rows") - assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): @@ -991,26 +992,27 @@ def test_sample_row_keys(request_type, transport: str = "grpc"): def test_sample_row_keys_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) + with mock.patch.object( + transports.pooled_grpc_asyncio.PooledChannel, "next_channel" + ) as next_channel: + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} - with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] + channel = client.transport._grpc_channel._pool[ + client.transport._grpc_channel._next_idx + ] next_channel.return_value = channel response = client.sample_row_keys(request) # Establish that next_channel was called next_channel.assert_called_once() - # Establish that stubs has been populated for the channel - stub_key = (channel, "sample_row_keys") - assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): @@ -1237,26 +1239,27 @@ def test_mutate_row(request_type, transport: str = "grpc"): def test_mutate_row_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) + with mock.patch.object( + transports.pooled_grpc_asyncio.PooledChannel, "next_channel" + ) as next_channel: + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} - with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] + channel = client.transport._grpc_channel._pool[ + client.transport._grpc_channel._next_idx + ] next_channel.return_value = channel response = client.mutate_row(request) # Establish that next_channel was called next_channel.assert_called_once() - # Establish that stubs has been populated for the channel - stub_key = (channel, "mutate_row") - assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): @@ -1528,26 +1531,27 @@ def test_mutate_rows(request_type, transport: str = "grpc"): def test_mutate_rows_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) + with mock.patch.object( + transports.pooled_grpc_asyncio.PooledChannel, "next_channel" + ) as next_channel: + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} - with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] + channel = client.transport._grpc_channel._pool[ + client.transport._grpc_channel._next_idx + ] next_channel.return_value = channel response = client.mutate_rows(request) # Establish that next_channel was called next_channel.assert_called_once() - # Establish that stubs has been populated for the channel - stub_key = (channel, "mutate_rows") - assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): @@ -1789,26 +1793,27 @@ def test_check_and_mutate_row(request_type, transport: str = "grpc"): def test_check_and_mutate_row_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) + with mock.patch.object( + transports.pooled_grpc_asyncio.PooledChannel, "next_channel" + ) as next_channel: + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} - with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] + channel = client.transport._grpc_channel._pool[ + client.transport._grpc_channel._next_idx + ] next_channel.return_value = channel response = client.check_and_mutate_row(request) # Establish that next_channel was called next_channel.assert_called_once() - # Establish that stubs has been populated for the channel - stub_key = (channel, "check_and_mutate_row") - assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): @@ -2194,26 +2199,27 @@ def test_ping_and_warm(request_type, transport: str = "grpc"): def test_ping_and_warm_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) + with mock.patch.object( + transports.pooled_grpc_asyncio.PooledChannel, "next_channel" + ) as next_channel: + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} - with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] + channel = client.transport._grpc_channel._pool[ + client.transport._grpc_channel._next_idx + ] next_channel.return_value = channel response = client.ping_and_warm(request) # Establish that next_channel was called next_channel.assert_called_once() - # Establish that stubs has been populated for the channel - stub_key = (channel, "ping_and_warm") - assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): @@ -2442,26 +2448,27 @@ def test_read_modify_write_row(request_type, transport: str = "grpc"): def test_read_modify_write_row_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) + with mock.patch.object( + transports.pooled_grpc_asyncio.PooledChannel, "next_channel" + ) as next_channel: + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} - with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] + channel = client.transport._grpc_channel._pool[ + client.transport._grpc_channel._next_idx + ] next_channel.return_value = channel response = client.read_modify_write_row(request) # Establish that next_channel was called next_channel.assert_called_once() - # Establish that stubs has been populated for the channel - stub_key = (channel, "read_modify_write_row") - assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): @@ -2731,26 +2738,27 @@ def test_generate_initial_change_stream_partitions( def test_generate_initial_change_stream_partitions_pooled_rotation( transport: str = "pooled_grpc_asyncio", ): - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) + with mock.patch.object( + transports.pooled_grpc_asyncio.PooledChannel, "next_channel" + ) as next_channel: + client = BigtableClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = {} - with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] + channel = client.transport._grpc_channel._pool[ + client.transport._grpc_channel._next_idx + ] next_channel.return_value = channel response = client.generate_initial_change_stream_partitions(request) # Establish that next_channel was called next_channel.assert_called_once() - # Establish that stubs has been populated for the channel - stub_key = (channel, "generate_initial_change_stream_partitions") - assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): @@ -3018,7 +3026,9 @@ def test_read_change_stream(request_type, transport: str = "grpc"): def test_read_change_stream_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - with mock.patch.object(transports.pooled_grpc_asyncio.PooledChannel, "next_channel") as next_channel: + with mock.patch.object( + transports.pooled_grpc_asyncio.PooledChannel, "next_channel" + ) as next_channel: client = BigtableClient( credentials=ga_credentials.AnonymousCredentials(), transport=transport, @@ -3028,15 +3038,15 @@ def test_read_change_stream_pooled_rotation(transport: str = "pooled_grpc_asynci # and we are mocking out the actual API, so just send an empty request. request = {} - channel = client.transport._grpc_channel._pool[client.transport._grpc_channel._next_idx] + channel = client.transport._grpc_channel._pool[ + client.transport._grpc_channel._next_idx + ] next_channel.return_value = channel + response = client.read_change_stream(request) # Establish that next_channel was called next_channel.assert_called_once() - # Establish that stubs has been populated for the channel - stub_key = (channel, "read_change_stream") - assert client.transport._stubs[stub_key] is not None # Establish that subsequent calls all call next_channel starting_idx = client.transport._grpc_channel._next_idx for i in range(2, 10): @@ -3381,7 +3391,7 @@ def test_read_rows_rest_required_fields(request_type=bigtable.ReadRowsRequest): iter_content.return_value = iter(json_return_value) response = client.read_rows(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -3667,7 +3677,7 @@ def test_sample_row_keys_rest_required_fields( iter_content.return_value = iter(json_return_value) response = client.sample_row_keys(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -3939,7 +3949,7 @@ def test_mutate_row_rest_required_fields(request_type=bigtable.MutateRowRequest) response = client.mutate_row(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -4234,7 +4244,7 @@ def test_mutate_rows_rest_required_fields(request_type=bigtable.MutateRowsReques iter_content.return_value = iter(json_return_value) response = client.mutate_rows(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -4521,7 +4531,7 @@ def test_check_and_mutate_row_rest_required_fields( response = client.check_and_mutate_row(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -4839,7 +4849,7 @@ def test_ping_and_warm_rest_required_fields(request_type=bigtable.PingAndWarmReq response = client.ping_and_warm(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -5105,7 +5115,7 @@ def test_read_modify_write_row_rest_required_fields( response = client.read_modify_write_row(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -5404,7 +5414,7 @@ def test_generate_initial_change_stream_partitions_rest_required_fields( iter_content.return_value = iter(json_return_value) response = client.generate_initial_change_stream_partitions(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -5703,7 +5713,7 @@ def test_read_change_stream_rest_required_fields( iter_content.return_value = iter(json_return_value) response = client.read_change_stream(request) - expected_params = [("$alt", "json;enum-encoding=int")] + expected_params = [] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -6253,6 +6263,7 @@ def test_bigtable_pooled_grpc_transport_client_cert_source_for_mtls(transport_cl pool_size=pool_num, ) mock_create_channel.assert_called_with( + pool_num, "squid.clam.whelk:443", credentials=cred, credentials_file=None, @@ -6264,7 +6275,7 @@ def test_bigtable_pooled_grpc_transport_client_cert_source_for_mtls(transport_cl ("grpc.max_receive_message_length", -1), ], ) - assert mock_create_channel.call_count == pool_num + assert mock_create_channel.call_count == 1 # Check if ssl_channel_credentials is not provided, then client_cert_source_for_mtls # is used. @@ -6700,7 +6711,9 @@ async def test_pooled_transport_close_async(): transport="pooled_grpc_asyncio", ) num_channels = len(client.transport._grpc_channel._pool) - with mock.patch.object(type(client.transport._grpc_channel._pool[0]), "close") as close: + with mock.patch.object( + type(client.transport._grpc_channel._pool[0]), "close" + ) as close: async with client: close.assert_not_called() close.assert_called() @@ -6776,7 +6789,7 @@ def test_api_key_credentials(client_class, transport_class): @pytest.mark.asyncio async def test_pooled_transport_replace_default(): - client = BigtableAsyncClient( + client = BigtableClient( credentials=ga_credentials.AnonymousCredentials(), transport="pooled_grpc_asyncio", ) @@ -6791,7 +6804,9 @@ async def test_pooled_transport_replace_default(): close.assert_called_once() close.assert_awaited() close.assert_called_with(grace=grace_period) - assert isinstance(client.transport._grpc_channel._pool[replace_idx], grpc.aio.Channel) + assert isinstance( + client.transport._grpc_channel._pool[replace_idx], grpc.aio.Channel + ) # only the specified channel should be replaced for i in range(num_channels): if i == replace_idx: @@ -6806,7 +6821,7 @@ async def test_pooled_transport_replace_default(): @pytest.mark.asyncio async def test_pooled_transport_replace_explicit(): - client = BigtableAsyncClient( + client = BigtableClient( credentials=ga_credentials.AnonymousCredentials(), transport="pooled_grpc_asyncio", ) @@ -6878,7 +6893,9 @@ def test_pooled_transport_pool_creation(): scopes = ["test1", "test2"] quota_project_id = "test3" host = "testhost:8080" - with mock.patch("google.api_core.grpc_helpers_async.create_channel") as create_channel: + with mock.patch( + "google.api_core.grpc_helpers_async.create_channel" + ) as create_channel: transport = transports.PooledBigtableGrpcAsyncIOTransport( credentials=creds, pool_size=num_channels, From 2b044ce66d7f6f8fd3d8a8a299697cdf2a7b16df Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 Apr 2023 18:26:46 -0700 Subject: [PATCH 179/349] removed metadata --- google/cloud/bigtable/client.py | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index d4c162b2f..94be659f1 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -58,7 +58,6 @@ def __init__( client_options: dict[str, Any] | "google.api_core.client_options.ClientOptions" | None = None, - metadata: list[tuple[str, str]] | None = None, ): """ Create a client instance for the Bigtable Data API @@ -79,7 +78,6 @@ def __init__( client_options (Optional[Union[dict, google.api_core.client_options.ClientOptions]]): Client options used to set user options on the client. API Endpoint should be set through client_options. - metadata: a list of metadata headers to be attached to all calls with this client Raises: - RuntimeError if called outside of an async run loop context - ValueError if pool_size is less than 1 @@ -111,7 +109,6 @@ def __init__( client_options=client_options, client_info=client_info, ) - self.metadata = metadata or [] # keep track of active instances to for warmup on channel refresh self._active_instances: Set[str] = set() # attempt to start background tasks @@ -279,7 +276,6 @@ def get_table( instance_id: str, table_id: str, app_profile_id: str | None = None, - metadata: list[tuple[str, str]] | None = None, ) -> Table: """ Returns a table instance for making data API requests @@ -291,9 +287,8 @@ def get_table( table_id: The ID of the table. app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles - metadata: a list of metadata headers to be attached to all calls with this client """ - return Table(self, instance_id, table_id, app_profile_id, metadata) + return Table(self, instance_id, table_id, app_profile_id) class Table: @@ -310,7 +305,6 @@ def __init__( instance_id: str, table_id: str, app_profile_id: str | None = None, - metadata: list[tuple[str, str]] | None = None, ): """ Initialize a Table instance @@ -324,7 +318,6 @@ def __init__( table_id: The ID of the table. app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles - metadata: a list of metadata headers to be attached to all calls with this client Raises: - RuntimeError if called outside of an async run loop context """ @@ -332,7 +325,6 @@ def __init__( self.instance = instance_id self.table_id = table_id self.app_profile_id = app_profile_id - self.metadata = metadata or [] # raises RuntimeError if called outside of an async run loop context try: self._register_instance_task = asyncio.create_task( @@ -356,7 +348,6 @@ async def read_rows_stream( per_row_timeout: int | float | None = 10, idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, - metadata: list[tuple[str, str]] | None = None, ) -> AsyncIterable[RowResponse]: """ Returns a generator to asynchronously stream back row data. @@ -392,7 +383,6 @@ async def read_rows_stream( - per_request_timeout: the time budget for an individual network request, in seconds. If it takes longer than this time to complete, the request will be cancelled with a DeadlineExceeded exception, and a retry will be attempted - - metadata: Strings which should be sent along with the request as metadata headers. Returns: - an asynchronous generator that yields rows returned by the query @@ -413,7 +403,6 @@ async def read_rows( operation_timeout: int | float | None = 60, per_row_timeout: int | float | None = 10, per_request_timeout: int | float | None = None, - metadata: list[tuple[str, str]] | None = None, ) -> list[RowResponse]: """ Helper function that returns a full list instead of a generator @@ -431,7 +420,6 @@ async def read_row( *, operation_timeout: int | float | None = 60, per_request_timeout: int | float | None = None, - metadata: list[tuple[str, str]] | None = None, ) -> RowResponse: """ Helper function to return a single row @@ -453,7 +441,6 @@ async def read_rows_sharded( per_row_timeout: int | float | None = 10, idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, - metadata: list[tuple[str, str]] | None = None, ) -> AsyncIterable[RowResponse]: """ Runs a sharded query in parallel @@ -472,7 +459,6 @@ async def row_exists( *, operation_timeout: int | float | None = 60, per_request_timeout: int | float | None = None, - metadata: list[tuple[str, str]] | None = None, ) -> bool: """ Helper function to determine if a row exists @@ -490,7 +476,6 @@ async def sample_keys( operation_timeout: int | float | None = 60, per_sample_timeout: int | float | None = 10, per_request_timeout: int | float | None = None, - metadata: list[tuple[str, str]] | None = None, ) -> RowKeySamples: """ Return a set of RowKeySamples that delimit contiguous sections of the table of @@ -531,7 +516,6 @@ async def mutate_row( *, operation_timeout: int | float | None = 60, per_request_timeout: int | float | None = None, - metadata: list[tuple[str, str]] | None = None, ): """ Mutates a row atomically. @@ -553,7 +537,6 @@ async def mutate_row( in seconds. If it takes longer than this time to complete, the request will be cancelled with a DeadlineExceeded exception, and a retry will be attempted if within operation_timeout budget - - metadata: Strings which should be sent along with the request as metadata headers. Raises: - DeadlineExceeded: raised after operation timeout @@ -570,7 +553,6 @@ async def bulk_mutate_rows( *, operation_timeout: int | float | None = 60, per_request_timeout: int | float | None = None, - metadata: list[tuple[str, str]] | None = None, ): """ Applies mutations for multiple rows in a single batched request. @@ -596,7 +578,6 @@ async def bulk_mutate_rows( in seconds. If it takes longer than this time to complete, the request will be cancelled with a DeadlineExceeded exception, and a retry will be attempted if within operation_timeout budget - - metadata: Strings which should be sent along with the request as metadata headers. Raises: - MutationsExceptionGroup if one or more mutations fails @@ -611,7 +592,6 @@ async def check_and_mutate_row( true_case_mutations: Mutation | list[Mutation] | None = None, false_case_mutations: Mutation | list[Mutation] | None = None, operation_timeout: int | float | None = 60, - metadata: list[tuple[str, str]] | None = None, ) -> bool: """ Mutates a row atomically based on the output of a predicate filter @@ -640,7 +620,6 @@ async def check_and_mutate_row( `true_case_mutations is empty, and at most 100000. - operation_timeout: the time budget for the entire operation, in seconds. Failed requests will not be retried. - - metadata: Strings which should be sent along with the request as metadata headers. Returns: - bool indicating whether the predicate was true or false Raises: @@ -657,7 +636,6 @@ async def read_modify_write_row( | list[dict[str, Any]], *, operation_timeout: int | float | None = 60, - metadata: list[tuple[str, str]] | None = None, ) -> RowResponse: """ Reads and modifies a row atomically according to input ReadModifyWriteRules, @@ -675,7 +653,6 @@ async def read_modify_write_row( results of later ones. - operation_timeout: the time budget for the entire operation, in seconds. Failed requests will not be retried. - - metadata: Strings which should be sent along with the request as metadata headers. Returns: - RowResponse: containing cell data that was modified as part of the operation From 1743098907e91c87d1956505e2f38779c29bed4a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 09:06:40 -0700 Subject: [PATCH 180/349] added sleep between swwapping and closing channels --- gapic-generator-fork | 2 +- google/cloud/bigtable/client.py | 4 ++-- .../bigtable/transports/pooled_grpc_asyncio.py | 13 ++++++++++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 4781d8cf9..b4ed4d2ea 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 4781d8cf9fe5979522f5736e9a2a1d4525881a8e +Subproject commit b4ed4d2ea730f8ed23a8c571daa6affd19ea3684 diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 94be659f1..958feb7dd 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -188,7 +188,7 @@ async def _manage_channel( self, channel_idx: int, refresh_interval: float = 60 * 45, - grace_period: float = 60 * 15, + grace_period: float = 60 * 10, ) -> None: """ Background coroutine that periodically refreshes and warms a grpc channel @@ -228,7 +228,7 @@ async def _manage_channel( await self._ping_and_warm_instances(new_channel) # cycle channel out of use, with long grace window before closure start_timestamp = time.time() - await self.transport.replace_channel(channel_idx, grace_period, new_channel) + await self.transport.replace_channel(channel_idx, grace=grace_period, swap_sleep=10, new_channel=new_channel) # subtract the time spent waiting for the channel to be replaced next_sleep = refresh_interval - (time.time() - start_timestamp) diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index 83df179d9..c14d8a42b 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -142,7 +142,7 @@ async def wait_for_state_change(self, last_observed_state): raise NotImplementedError() async def replace_channel( - self, channel_idx, grace=None, new_channel=None + self, channel_idx, grace=None, swap_sleep=1, new_channel=None ) -> aio.Channel: """ Replaces a channel in the pool with a fresh one. @@ -155,6 +155,8 @@ async def replace_channel( grace(Optional[float]): The time to wait until all active RPCs are finished. If a grace period is not specified (by passing None for grace), all existing RPCs are cancelled immediately. + swap_sleep(Optional[float]): The number of seconds to sleep in between + replacing channels and closing the old one new_channel(grpc.aio.Channel): a new channel to insert into the pool at `channel_idx`. If `None`, a new channel will be created. """ @@ -166,6 +168,7 @@ async def replace_channel( new_channel = self._create_channel() old_channel = self._pool[channel_idx] self._pool[channel_idx] = new_channel + await asyncio.sleep(swap_sleep) await old_channel.close(grace=grace) return new_channel @@ -386,7 +389,7 @@ def grpc_channel(self) -> aio.Channel: return self._grpc_channel async def replace_channel( - self, channel_idx, grace=None, new_channel=None + self, channel_idx, grace=None, swap_sleep=1, new_channel=None ) -> aio.Channel: """ Replaces a channel in the pool with a fresh one. @@ -399,10 +402,14 @@ async def replace_channel( grace(Optional[float]): The time to wait until all active RPCs are finished. If a grace period is not specified (by passing None for grace), all existing RPCs are cancelled immediately. + swap_sleep(Optional[float]): The number of seconds to sleep in between + replacing channels and closing the old one new_channel(grpc.aio.Channel): a new channel to insert into the pool at `channel_idx`. If `None`, a new channel will be created. """ - return await self._grpc_channel.replace_channel(channel_idx, grace, new_channel) + return await self._grpc_channel.replace_channel( + channel_idx, grace, swap_sleep, new_channel + ) @property def read_rows( From e5fa4b6e6d3b29e46a1a95742c9ec519da23ea0d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 09:07:59 -0700 Subject: [PATCH 181/349] ran blacken --- google/cloud/bigtable/client.py | 4 +++- tests/unit/test_client.py | 20 +++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 958feb7dd..cb4f51436 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -228,7 +228,9 @@ async def _manage_channel( await self._ping_and_warm_instances(new_channel) # cycle channel out of use, with long grace window before closure start_timestamp = time.time() - await self.transport.replace_channel(channel_idx, grace=grace_period, swap_sleep=10, new_channel=new_channel) + await self.transport.replace_channel( + channel_idx, grace=grace_period, swap_sleep=10, new_channel=new_channel + ) # subtract the time spent waiting for the channel to be replaced next_sleep = refresh_interval - (time.time() - start_timestamp) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index de792350e..0f615111a 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -155,7 +155,9 @@ async def test_veneer_grpc_headers(): @pytest.mark.asyncio async def test_channel_pool_creation(): pool_size = 14 - with mock.patch("google.api_core.grpc_helpers_async.create_channel") as create_channel: + with mock.patch( + "google.api_core.grpc_helpers_async.create_channel" + ) as create_channel: create_channel.return_value = AsyncMock() client = _make_one(project="project-id", pool_size=pool_size) assert create_channel.call_count == pool_size @@ -174,11 +176,17 @@ async def test_channel_pool_rotation(): client = _make_one(project="project-id", pool_size=pool_size) assert len(client.transport._grpc_channel._pool) == pool_size - with mock.patch.object(type(client.transport._grpc_channel), "next_channel") as next_channel: - with mock.patch.object(type(client.transport._grpc_channel._pool[0]), "unary_unary"): + with mock.patch.object( + type(client.transport._grpc_channel), "next_channel" + ) as next_channel: + with mock.patch.object( + type(client.transport._grpc_channel._pool[0]), "unary_unary" + ): # calling an rpc `pool_size` times should use a different channel each time for i in range(pool_size): - channel_1 = client.transport._grpc_channel._pool[client.transport._next_idx] + channel_1 = client.transport._grpc_channel._pool[ + client.transport._next_idx + ] next_channel.return_value = channel_1 client.transport.ping_and_warm() assert next_channel.call_count == i + 1 @@ -336,7 +344,9 @@ async def test__manage_channel_ping_and_warm(): client = _make_one(project="project-id") new_channel = grpc.aio.insecure_channel("localhost:8080") with mock.patch.object(asyncio, "sleep"): - with mock.patch("google.api_core.grpc_helpers_async.create_channel") as create_channel: + with mock.patch( + "google.api_core.grpc_helpers_async.create_channel" + ) as create_channel: create_channel.return_value = new_channel with mock.patch.object( PooledBigtableGrpcAsyncIOTransport, "replace_channel" From 8955ec50140e3e113561a84d8893d3bb14174901 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 10:39:12 -0700 Subject: [PATCH 182/349] got tests working --- google/cloud/bigtable/client.py | 12 +-- tests/unit/test_client.py | 158 ++++++++++++++++---------------- 2 files changed, 79 insertions(+), 91 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index cb4f51436..e378a3a7c 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -214,17 +214,7 @@ async def _manage_channel( while True: await asyncio.sleep(next_sleep) # prepare new channel for use - new_channel = self.transport.create_channel( - self.transport._host, - credentials=self.transport._credentials, - scopes=self.transport._scopes, - ssl_credentials=self.transport._ssl_channel_credentials, - quota_project_id=self.transport._quota_project_id, - options=[ - ("grpc.max_send_message_length", -1), - ("grpc.max_receive_message_length", -1), - ], - ) + new_channel = self.transport.grpc_channel._create_channel() await self._ping_and_warm_instances(new_channel) # cycle channel out of use, with long grace window before closure start_timestamp = time.time() diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 0f615111a..7439f8383 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -48,18 +48,15 @@ def _make_one(*args, **kwargs): async def test_ctor(): expected_project = "project-id" expected_pool_size = 11 - expected_metadata = [("a", "b")] expected_credentials = AnonymousCredentials() client = _make_one( project="project-id", pool_size=expected_pool_size, - metadata=expected_metadata, credentials=expected_credentials, ) await asyncio.sleep(0.1) assert client.project == expected_project assert len(client.transport._grpc_channel._pool) == expected_pool_size - assert client.metadata == expected_metadata assert not client._active_instances assert len(client._channel_refresh_tasks) == expected_pool_size assert client.transport._credentials == expected_credentials @@ -79,19 +76,19 @@ async def test_ctor_super_inits(): credentials = AnonymousCredentials() client_options = {"api_endpoint": "foo.bar:1234"} options_parsed = client_options_lib.from_dict(client_options) - metadata = [("a", "b")] transport_str = f"pooled_grpc_asyncio_{pool_size}" with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: with mock.patch.object( _ClientProjectMixin, "__init__" ) as client_project_mixin_init: + client_project_mixin_init.__code__ = mock.Mock() + client_project_mixin_init.__code__.co_varnames = "credentials" try: _make_one( project=project, pool_size=pool_size, credentials=credentials, client_options=options_parsed, - metadata=metadata, ) except AttributeError: pass @@ -172,51 +169,57 @@ async def test_channel_pool_creation(): @pytest.mark.asyncio async def test_channel_pool_rotation(): + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledChannel, + ) + pool_size = 7 - client = _make_one(project="project-id", pool_size=pool_size) - assert len(client.transport._grpc_channel._pool) == pool_size - with mock.patch.object( - type(client.transport._grpc_channel), "next_channel" - ) as next_channel: + with mock.patch.object(PooledChannel, "next_channel") as next_channel: + client = _make_one(project="project-id", pool_size=pool_size) + assert len(client.transport._grpc_channel._pool) == pool_size + next_channel.reset_mock() with mock.patch.object( type(client.transport._grpc_channel._pool[0]), "unary_unary" - ): + ) as unary_unary: # calling an rpc `pool_size` times should use a different channel each time + channel_next = None for i in range(pool_size): - channel_1 = client.transport._grpc_channel._pool[ - client.transport._next_idx - ] - next_channel.return_value = channel_1 + channel_last = channel_next + channel_next = client.transport.grpc_channel._pool[i] + assert channel_last != channel_next + next_channel.return_value = channel_next client.transport.ping_and_warm() assert next_channel.call_count == i + 1 - channel_1.unary_unary.assert_called_once() + unary_unary.assert_called_once() + unary_unary.reset_mock() await client.close() @pytest.mark.asyncio async def test_channel_pool_replace(): - pool_size = 7 - client = _make_one(project="project-id", pool_size=pool_size) - for replace_idx in range(pool_size): - start_pool = [channel for channel in client.transport._grpc_channel._pool] - grace_period = 9 - with mock.patch.object( - type(client.transport._grpc_channel._pool[0]), "close" - ) as close: - new_channel = grpc.aio.insecure_channel("localhost:8080") - await client.transport.replace_channel( - replace_idx, grace=grace_period, new_channel=new_channel - ) - close.assert_called_once_with(grace=grace_period) - close.assert_awaited_once() - assert client.transport._grpc_channel._pool[replace_idx] == new_channel - for i in range(pool_size): - if i != replace_idx: - assert client.transport._grpc_channel._pool[i] == start_pool[i] - else: - assert client.transport._grpc_channel._pool[i] != start_pool[i] - await client.close() + with mock.patch.object(asyncio, "sleep") as sleep: + pool_size = 7 + client = _make_one(project="project-id", pool_size=pool_size) + for replace_idx in range(pool_size): + start_pool = [channel for channel in client.transport._grpc_channel._pool] + grace_period = 9 + with mock.patch.object( + type(client.transport._grpc_channel._pool[0]), "close" + ) as close: + new_channel = grpc.aio.insecure_channel("localhost:8080") + await client.transport.replace_channel( + replace_idx, grace=grace_period, new_channel=new_channel + ) + close.assert_called_once_with(grace=grace_period) + close.assert_awaited_once() + assert client.transport._grpc_channel._pool[replace_idx] == new_channel + for i in range(pool_size): + if i != replace_idx: + assert client.transport._grpc_channel._pool[i] == start_pool[i] + else: + assert client.transport._grpc_channel._pool[i] != start_pool[i] + await client.close() @pytest.mark.filterwarnings("ignore::RuntimeWarning") @@ -344,38 +347,37 @@ async def test__manage_channel_ping_and_warm(): client = _make_one(project="project-id") new_channel = grpc.aio.insecure_channel("localhost:8080") with mock.patch.object(asyncio, "sleep"): - with mock.patch( - "google.api_core.grpc_helpers_async.create_channel" - ) as create_channel: - create_channel.return_value = new_channel + create_channel = mock.Mock() + create_channel.return_value = new_channel + client.transport.grpc_channel._create_channel = create_channel + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "replace_channel" + ) as replace_channel: + replace_channel.side_effect = asyncio.CancelledError + # should ping and warm old channel then new if sleep > 0 with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "replace_channel" - ) as replace_channel: - replace_channel.side_effect = asyncio.CancelledError - # should ping and warm old channel then new if sleep > 0 - with mock.patch.object( - type(_make_one()), "_ping_and_warm_instances" - ) as ping_and_warm: - try: - channel_idx = 2 - old_channel = client.transport._grpc_channel._pool[channel_idx] - await client._manage_channel(channel_idx, 10) - except asyncio.CancelledError: - pass - assert ping_and_warm.call_count == 2 - assert old_channel != new_channel - called_with = [call[0][0] for call in ping_and_warm.call_args_list] - assert old_channel in called_with - assert new_channel in called_with - # should ping and warm instantly new channel only if not sleeping - with mock.patch.object( - type(_make_one()), "_ping_and_warm_instances" - ) as ping_and_warm: - try: - await client._manage_channel(0, 0) - except asyncio.CancelledError: - pass - ping_and_warm.assert_called_once_with(new_channel) + type(_make_one()), "_ping_and_warm_instances" + ) as ping_and_warm: + try: + channel_idx = 2 + old_channel = client.transport._grpc_channel._pool[channel_idx] + await client._manage_channel(channel_idx, 10) + except asyncio.CancelledError: + pass + assert ping_and_warm.call_count == 2 + assert old_channel != new_channel + called_with = [call[0][0] for call in ping_and_warm.call_args_list] + assert old_channel in called_with + assert new_channel in called_with + # should ping and warm instantly new channel only if not sleeping + with mock.patch.object( + type(_make_one()), "_ping_and_warm_instances" + ) as ping_and_warm: + try: + await client._manage_channel(0, 0) + except asyncio.CancelledError: + pass + ping_and_warm.assert_called_once_with(new_channel) await client.close() @@ -422,6 +424,7 @@ async def test__manage_channel_refresh(num_cycles): from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( PooledBigtableGrpcAsyncIOTransport, ) + from google.api_core import grpc_helpers_async expected_grace = 9 expected_refresh = 0.5 @@ -435,11 +438,12 @@ async def test__manage_channel_refresh(num_cycles): sleep.side_effect = [None for i in range(num_cycles)] + [ asyncio.CancelledError ] - client = _make_one(project="project-id") with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "create_channel" + grpc_helpers_async, "create_channel" ) as create_channel: create_channel.return_value = new_channel + client = _make_one(project="project-id") + create_channel.reset_mock() try: await client._manage_channel( channel_idx, @@ -452,9 +456,10 @@ async def test__manage_channel_refresh(num_cycles): assert create_channel.call_count == num_cycles assert replace_channel.call_count == num_cycles for call in replace_channel.call_args_list: - assert call[0][0] == channel_idx - assert call[0][1] == expected_grace - assert call[0][2] == new_channel + args, kwargs = call + assert args[0] == channel_idx + assert kwargs["grace"] == expected_grace + assert kwargs["new_channel"] == new_channel await client.close() @@ -537,19 +542,16 @@ async def test_get_table(): expected_table_id = "table-id" expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" - expected_metadata = [("a", "b")] table = client.get_table( expected_instance_id, expected_table_id, expected_app_profile_id, - expected_metadata, ) await asyncio.sleep(0) assert isinstance(table, Table) assert table.table_id == expected_table_id assert table.instance == expected_instance_id assert table.app_profile_id == expected_app_profile_id - assert table.metadata == expected_metadata assert table.client is client full_instance_name = client.instance_path(client.project, expected_instance_id) assert full_instance_name in client._active_instances @@ -656,7 +658,6 @@ async def test_table_ctor(): expected_table_id = "table-id" expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" - expected_metadata = [("a", "b")] client = BigtableDataClient() assert not client._active_instances @@ -665,13 +666,11 @@ async def test_table_ctor(): expected_instance_id, expected_table_id, expected_app_profile_id, - expected_metadata, ) await asyncio.sleep(0) assert table.table_id == expected_table_id assert table.instance == expected_instance_id assert table.app_profile_id == expected_app_profile_id - assert table.metadata == expected_metadata assert table.client is client full_instance_name = client.instance_path(client.project, expected_instance_id) assert full_instance_name in client._active_instances @@ -694,5 +693,4 @@ def test_table_ctor_sync(): assert table.table_id == "table-id" assert table.instance == "instance-id" assert table.app_profile_id is None - assert table.metadata == [] assert table.client is client From 002bc5f298c5fbad665791fdbe6f446d3e445f5d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 10:44:11 -0700 Subject: [PATCH 183/349] fixed lint issue --- tests/unit/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 7439f8383..ecbcb63d1 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -198,7 +198,7 @@ async def test_channel_pool_rotation(): @pytest.mark.asyncio async def test_channel_pool_replace(): - with mock.patch.object(asyncio, "sleep") as sleep: + with mock.patch.object(asyncio, "sleep"): pool_size = 7 client = _make_one(project="project-id", pool_size=pool_size) for replace_idx in range(pool_size): From 65f0d2fc7af67303dca61b95eace490b5e0a5b7f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 10:51:53 -0700 Subject: [PATCH 184/349] fixed tests --- tests/unit/gapic/bigtable_v2/test_bigtable.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index 9a10ba94a..b1500aa48 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -3391,7 +3391,7 @@ def test_read_rows_rest_required_fields(request_type=bigtable.ReadRowsRequest): iter_content.return_value = iter(json_return_value) response = client.read_rows(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -3677,7 +3677,7 @@ def test_sample_row_keys_rest_required_fields( iter_content.return_value = iter(json_return_value) response = client.sample_row_keys(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -3949,7 +3949,7 @@ def test_mutate_row_rest_required_fields(request_type=bigtable.MutateRowRequest) response = client.mutate_row(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -4244,7 +4244,7 @@ def test_mutate_rows_rest_required_fields(request_type=bigtable.MutateRowsReques iter_content.return_value = iter(json_return_value) response = client.mutate_rows(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -4531,7 +4531,7 @@ def test_check_and_mutate_row_rest_required_fields( response = client.check_and_mutate_row(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -4849,7 +4849,7 @@ def test_ping_and_warm_rest_required_fields(request_type=bigtable.PingAndWarmReq response = client.ping_and_warm(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -5115,7 +5115,7 @@ def test_read_modify_write_row_rest_required_fields( response = client.read_modify_write_row(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -5414,7 +5414,7 @@ def test_generate_initial_change_stream_partitions_rest_required_fields( iter_content.return_value = iter(json_return_value) response = client.generate_initial_change_stream_partitions(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params @@ -5713,7 +5713,7 @@ def test_read_change_stream_rest_required_fields( iter_content.return_value = iter(json_return_value) response = client.read_change_stream(request) - expected_params = [] + expected_params = [("$alt", "json;enum-encoding=int")] actual_params = req.call_args.kwargs["params"] assert expected_params == actual_params From 5f41c06be8d389ad289ce097d208bad67eeaf10d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 11:51:58 -0700 Subject: [PATCH 185/349] changed return type --- google/cloud/bigtable/client.py | 18 +++++++++--------- google/cloud/bigtable/row_merger.py | 4 ++-- tests/unit/test_row.py | 3 --- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 22d6b7eb3..9be70673c 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -15,7 +15,7 @@ from __future__ import annotations -from typing import cast, Any, Optional, AsyncIterable, Set, TYPE_CHECKING +from typing import cast, Any, Optional, AsyncIterable, AsyncIterator, Set, TYPE_CHECKING import asyncio import grpc @@ -42,6 +42,7 @@ if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry + from google.cloud.bigtable_v2.types import ReadRowsResponse from google.cloud.bigtable.mutations_batcher import MutationsBatcher from google.cloud.bigtable.row import Row from google.cloud.bigtable.read_rows_query import ReadRowsQuery @@ -342,7 +343,7 @@ async def read_rows_stream( per_row_timeout: int | float | None = 10, idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, - ) -> AsyncIterator[Row]: + ) -> ReadRowsGenerator: """ Returns a generator to asynchronously stream back row data. @@ -386,9 +387,9 @@ async def read_rows_stream( from any retries that failed - IdleTimeout: if generator was abandoned """ - request = query.to_dict() if isinstance(query, ReadRowsQuery) else query - request["table_name"] = self._gapic_client.table_name(self.table_id) - gapic_stream_handler = await self._gapic_client.read_rows( + request = query._to_dict() if isinstance(query, ReadRowsQuery) else query + request["table_name"] = self._client.table_name(self.table_id) + gapic_stream_handler = await self._client.read_rows( request=request, app_profile_id=self.app_profile_id, timeout=operation_timeout, @@ -442,7 +443,7 @@ async def read_rows_sharded( per_row_timeout: int | float | None = 10, idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, - ) -> AsyncIterable[Row]: + ) -> ReadRowsGenerator: """ Runs a sharded query in parallel @@ -663,12 +664,12 @@ async def read_modify_write_row( raise NotImplementedError -class ReadRowsGenerator(): +class ReadRowsGenerator(AsyncIterator[Row]): """ User-facing async generator for streaming read_rows responses """ - def __init__(self, gapic_stream:AsyncIterable["ReadRowsResponse"]): + def __init__(self, gapic_stream: AsyncIterable["ReadRowsResponse"]): merger = RowMerger() self._inner_gen = merger.merge_row_stream(gapic_stream) self.request_stats = None @@ -685,4 +686,3 @@ async def __anext__(self) -> Row: self.request_stats = next_item next_item = await self._inner_gen.__anext__() return next_item - diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index b1c98480e..1c9c1d007 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -52,7 +52,7 @@ def __init__(self): async def merge_row_stream( self, request_generator: AsyncIterable[ReadRowsResponse] - ) -> AsyncGenerator[Row|RequestStats, None]: + ) -> AsyncGenerator[Row | RequestStats, None]: """ Consume chunks from a ReadRowsResponse stream into a set of Rows @@ -96,7 +96,7 @@ async def merge_row_stream_with_cache( self, request_generator: AsyncIterable[ReadRowsResponse], max_cache_size: int | None = None, - ) -> AsyncGenerator[Row|RequestStats, None]: + ) -> AsyncGenerator[Row | RequestStats, None]: """ Consume chunks from a ReadRowsResponse stream into a set of Rows, with a local cache to decouple the producer from the consumer diff --git a/tests/unit/test_row.py b/tests/unit/test_row.py index 92f58eb36..7f3d27cb9 100644 --- a/tests/unit/test_row.py +++ b/tests/unit/test_row.py @@ -90,9 +90,6 @@ def test_get_cells(self): row_response.get_cells(family="1", qualifier=b"c") def test___repr__(self): - from google.cloud.bigtable.row import Cell - from google.cloud.bigtable.row import Row - cell_str = ( "{'value': b'1234', 'timestamp_micros': %d, 'labels': ['label1', 'label2']}" % (TEST_TIMESTAMP) From 7b68207ee88d3e49a20678d6989494d7c59408e5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 12:05:18 -0700 Subject: [PATCH 186/349] fixed typing issues --- google/cloud/bigtable/client.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 9be70673c..1f9c9ebe4 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -15,7 +15,7 @@ from __future__ import annotations -from typing import cast, Any, Optional, AsyncIterable, AsyncIterator, Set, TYPE_CHECKING +from typing import cast, Any, Optional, AsyncIterable, Set, TYPE_CHECKING import asyncio import grpc @@ -388,8 +388,8 @@ async def read_rows_stream( - IdleTimeout: if generator was abandoned """ request = query._to_dict() if isinstance(query, ReadRowsQuery) else query - request["table_name"] = self._client.table_name(self.table_id) - gapic_stream_handler = await self._client.read_rows( + request["table_name"] = self.client.table_path(self.table_id) + gapic_stream_handler = await self.client.read_rows( request=request, app_profile_id=self.app_profile_id, timeout=operation_timeout, @@ -664,7 +664,7 @@ async def read_modify_write_row( raise NotImplementedError -class ReadRowsGenerator(AsyncIterator[Row]): +class ReadRowsGenerator(AsyncIterable[Row]): """ User-facing async generator for streaming read_rows responses """ @@ -672,10 +672,10 @@ class ReadRowsGenerator(AsyncIterator[Row]): def __init__(self, gapic_stream: AsyncIterable["ReadRowsResponse"]): merger = RowMerger() self._inner_gen = merger.merge_row_stream(gapic_stream) - self.request_stats = None + self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() - async def __aiter__(self) -> AsyncIterator[Row]: + async def __aiter__(self): return self async def __anext__(self) -> Row: From c164a47e34bda5fe4b972dd029d29a3cf3bbb515 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 12:37:08 -0700 Subject: [PATCH 187/349] adjusted types --- google/cloud/bigtable/client.py | 72 ++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 046f6ec9e..f9df573cc 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -15,7 +15,15 @@ from __future__ import annotations -from typing import cast, Any, Optional, AsyncIterable, Set, TYPE_CHECKING +from typing import ( + cast, + Any, + Optional, + AsyncIterable, + AsyncGenerator, + Set, + TYPE_CHECKING, +) import asyncio import grpc @@ -43,7 +51,6 @@ if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry - from google.cloud.bigtable_v2.types import ReadRowsResponse from google.cloud.bigtable.mutations_batcher import MutationsBatcher from google.cloud.bigtable.row import Row from google.cloud.bigtable.row import _LastScannedRow @@ -383,11 +390,12 @@ async def read_rows_stream( from any retries that failed - IdleTimeout: if generator was abandoned """ - request = query.to_dict() if isinstance(query, ReadRowsQuery) else query + request = query._to_dict() if isinstance(query, ReadRowsQuery) else query request["table_name"] = self.client.table_path(self.table_id) def on_error(exc): return exc + retry = retries.AsyncRetry( predicate=retries.if_exception_type( InvalidChunk, @@ -399,48 +407,57 @@ def on_error(exc): initial=0.1, multiplier=2, maximum=1, - is_generator=True + is_generator=True, ) retryable_fn = retry(self._read_rows_retryable) - emitted_rows:set[bytes] = set({}) - async for result in retryable_fn(request, emitted_rows, per_request_timeout, per_request_timeout): - if isinstance(result, Row): - yield result - + emitted_rows: set[bytes] = set({}) + return ReadRowsGenerator( + retryable_fn( + request, emitted_rows, per_request_timeout, per_request_timeout + ) + ) async def _read_rows_retryable( - self, request:dict[str, Any], emitted_rows: set[bytes], per_request_timeout=None, per_row_timeout=None, revise_on_retry=True, cache_size_limit=None, - ) -> AsyncIterable[Row, None]: + self, + request: dict[str, Any], + emitted_rows: set[bytes], + per_request_timeout=None, + per_row_timeout=None, + revise_on_retry=True, + cache_size_limit=None, + ) -> AsyncGenerator[Row | RequestStats, None]: if revise_on_retry and len(emitted_rows) > 0: # if this is a retry, try to trim down the request to avoid ones we've already processed request["rows"] = self._revise_rowset( request.get("rows", None), emitted_rows ) - gapic_stream_handler = await self._gapic_client.read_rows( + gapic_stream_handler = await self.client.read_rows( request=request, app_profile_id=self.app_profile_id, timeout=per_request_timeout, ) merger = RowMerger() - generator = merger.merge_row_stream_with_cache(gapic_stream_handler, cache_size_limit) + generator = merger.merge_row_stream_with_cache( + gapic_stream_handler, cache_size_limit + ) while True: try: - row = await asyncio.wait_for(generator.__anext__(), timeout=per_row_timeout) + row = await asyncio.wait_for( + generator.__anext__(), timeout=per_row_timeout + ) if row.row_key not in emitted_rows: if not isinstance(row, _LastScannedRow): # last scanned rows are not emitted yield row emitted_rows.add(row.row_key) except asyncio.TimeoutError: - generator.close() + await generator.aclose() raise core_exceptions.DeadlineExceeded("per_row_timeout exceeded") except StopAsyncIteration: break - - def _revise_rowset( - self, row_set: dict[str, Any]|None, emitted_rows: set[bytes] + self, row_set: dict[str, Any] | None, emitted_rows: set[bytes] ) -> dict[str, Any]: # if user is doing a whole table scan, start a new one with the last seen key if row_set is None: @@ -451,7 +468,7 @@ def _revise_rowset( } else: # remove seen keys from user-specific key list - row_keys: List[bytes] = row_set.get("row_keys", []) + row_keys: list[bytes] = row_set.get("row_keys", []) adjusted_keys = [] for key in row_keys: if key not in emitted_rows: @@ -737,9 +754,8 @@ class ReadRowsGenerator(AsyncIterable[Row]): User-facing async generator for streaming read_rows responses """ - def __init__(self, gapic_stream: AsyncIterable["ReadRowsResponse"]): - merger = RowMerger() - self._inner_gen = merger.merge_row_stream(gapic_stream) + def __init__(self, stream: AsyncGenerator[Row | RequestStats, None]): + self.stream = stream self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() @@ -748,9 +764,9 @@ async def __aiter__(self): async def __anext__(self) -> Row: self.last_interaction_time = time.time() - next_item = await self._inner_gen.__anext__() - while not isinstance(next_item, Row): - if isinstance(next_item, RequestStats): - self.request_stats = next_item - next_item = await self._inner_gen.__anext__() - return next_item + next_item = await self.stream.__anext__() + if isinstance(next_item, RequestStats): + self.request_stats = next_item + return await self.__anext__() + else: + return next_item From 96d58d1431f39f8da98e8046b664174630129c31 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 13:40:08 -0700 Subject: [PATCH 188/349] added per-row-rimeout to merge_row_stream_with_cache --- google/cloud/bigtable/client.py | 19 ++++++++----------- google/cloud/bigtable/row_merger.py | 10 ++++++---- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index f9df573cc..16dd8c497 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -438,23 +438,20 @@ async def _read_rows_retryable( ) merger = RowMerger() generator = merger.merge_row_stream_with_cache( - gapic_stream_handler, cache_size_limit + gapic_stream_handler, cache_size_limit, per_row_timeout ) - while True: - try: - row = await asyncio.wait_for( - generator.__anext__(), timeout=per_row_timeout - ) + try: + async for row in generator: if row.row_key not in emitted_rows: if not isinstance(row, _LastScannedRow): # last scanned rows are not emitted yield row emitted_rows.add(row.row_key) - except asyncio.TimeoutError: - await generator.aclose() - raise core_exceptions.DeadlineExceeded("per_row_timeout exceeded") - except StopAsyncIteration: - break + except asyncio.TimeoutError: + await generator.aclose() + raise core_exceptions.DeadlineExceeded("per_row_timeout exceeded") + except StopAsyncIteration as e: + raise e def _revise_rowset( self, row_set: dict[str, Any] | None, emitted_rows: set[bytes] diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 1c9c1d007..216de50de 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -96,6 +96,7 @@ async def merge_row_stream_with_cache( self, request_generator: AsyncIterable[ReadRowsResponse], max_cache_size: int | None = None, + per_row_timeout: float | None = None, ) -> AsyncGenerator[Row | RequestStats, None]: """ Consume chunks from a ReadRowsResponse stream into a set of Rows, @@ -106,6 +107,8 @@ async def merge_row_stream_with_cache( this is a stream of chunks from the Bigtable API - max_cache_size: maximum number of items to cache. If None, cache size is unbounded + - per_row_timeout: maximum time to wait for a complete row. If None, + timeout is unbounded Returns: - AsyncGenerator of Rows Raises: @@ -124,12 +127,11 @@ async def merge_row_stream_with_cache( yield await cache.get() else: # wait for either the stream to finish, or a new item to enter the cache - get_from_cache = asyncio.create_task(cache.get()) - await asyncio.wait( + get_from_cache = asyncio.wait_for(cache.get(), per_row_timeout) + first_finish = asyncio.wait( [stream_task, get_from_cache], return_when=asyncio.FIRST_COMPLETED ) - if get_from_cache.done(): - yield get_from_cache.result() + await asyncio.wait_for(first_finish, per_row_timeout) # stream and cache are complete. if there's an exception, raise it if stream_task.exception(): raise cast(Exception, stream_task.exception()) From 216610eb08a77b7cd98a8a657878534091a57e8a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 13:46:05 -0700 Subject: [PATCH 189/349] cancel stream on exception --- google/cloud/bigtable/row_merger.py | 32 ++++++++++++++++------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 216de50de..b9035667c 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -121,20 +121,24 @@ async def merge_row_stream_with_cache( stream_task = asyncio.create_task( self._generator_to_cache(cache, self.merge_row_stream(request_generator)) ) - # read from state machine and push into cache - while not stream_task.done() or not cache.empty(): - if not cache.empty(): - yield await cache.get() - else: - # wait for either the stream to finish, or a new item to enter the cache - get_from_cache = asyncio.wait_for(cache.get(), per_row_timeout) - first_finish = asyncio.wait( - [stream_task, get_from_cache], return_when=asyncio.FIRST_COMPLETED - ) - await asyncio.wait_for(first_finish, per_row_timeout) - # stream and cache are complete. if there's an exception, raise it - if stream_task.exception(): - raise cast(Exception, stream_task.exception()) + try: + # read from state machine and push into cache + while not stream_task.done() or not cache.empty(): + if not cache.empty(): + yield await cache.get() + else: + # wait for either the stream to finish, or a new item to enter the cache + get_from_cache = asyncio.wait_for(cache.get(), per_row_timeout) + first_finish = asyncio.wait( + [stream_task, get_from_cache], + return_when=asyncio.FIRST_COMPLETED, + ) + await asyncio.wait_for(first_finish, per_row_timeout) + # stream and cache are complete. if there's an exception, raise it + if stream_task.exception(): + raise cast(Exception, stream_task.exception()) + finally: + stream_task.cancel() class StateMachine: From c505c39196bacfd6b7b7c69d19aad68358a896fa Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 14:56:57 -0700 Subject: [PATCH 190/349] moved retry logic into RetryableRowMerger --- google/cloud/bigtable/client.py | 85 +---------------------------- google/cloud/bigtable/row_merger.py | 84 +++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 83 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 16dd8c497..5df684a73 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -40,6 +40,7 @@ from google.cloud.client import _ClientProjectMixin from google.api_core.exceptions import GoogleAPICallError from google.cloud.bigtable.row_merger import RowMerger +from google.cloud.bigtable.row_merger import RetryableRowMerger from google.cloud.bigtable.row_merger import InvalidChunk from google.cloud.bigtable_v2.types import RequestStats @@ -392,92 +393,12 @@ async def read_rows_stream( """ request = query._to_dict() if isinstance(query, ReadRowsQuery) else query request["table_name"] = self.client.table_path(self.table_id) + request["app_profile_id"] = self.app_profile_id - def on_error(exc): - return exc - - retry = retries.AsyncRetry( - predicate=retries.if_exception_type( - InvalidChunk, - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - ), - timeout=operation_timeout, - on_error=on_error, - initial=0.1, - multiplier=2, - maximum=1, - is_generator=True, - ) - retryable_fn = retry(self._read_rows_retryable) - emitted_rows: set[bytes] = set({}) return ReadRowsGenerator( - retryable_fn( - request, emitted_rows, per_request_timeout, per_request_timeout - ) + RetryableRowMerger(request, self.client.read_rows, cache_size=cache_size, operation_timeout=operation_timeout, per_row_timeout=per_row_timeout, idle_timeout=idle_timeout, per_request_timeout=per_request_timeout) ) - async def _read_rows_retryable( - self, - request: dict[str, Any], - emitted_rows: set[bytes], - per_request_timeout=None, - per_row_timeout=None, - revise_on_retry=True, - cache_size_limit=None, - ) -> AsyncGenerator[Row | RequestStats, None]: - if revise_on_retry and len(emitted_rows) > 0: - # if this is a retry, try to trim down the request to avoid ones we've already processed - request["rows"] = self._revise_rowset( - request.get("rows", None), emitted_rows - ) - gapic_stream_handler = await self.client.read_rows( - request=request, - app_profile_id=self.app_profile_id, - timeout=per_request_timeout, - ) - merger = RowMerger() - generator = merger.merge_row_stream_with_cache( - gapic_stream_handler, cache_size_limit, per_row_timeout - ) - try: - async for row in generator: - if row.row_key not in emitted_rows: - if not isinstance(row, _LastScannedRow): - # last scanned rows are not emitted - yield row - emitted_rows.add(row.row_key) - except asyncio.TimeoutError: - await generator.aclose() - raise core_exceptions.DeadlineExceeded("per_row_timeout exceeded") - except StopAsyncIteration as e: - raise e - - def _revise_rowset( - self, row_set: dict[str, Any] | None, emitted_rows: set[bytes] - ) -> dict[str, Any]: - # if user is doing a whole table scan, start a new one with the last seen key - if row_set is None: - last_seen = max(emitted_rows) - return { - "row_keys": [], - "row_ranges": [{"start_key_open": last_seen}], - } - else: - # remove seen keys from user-specific key list - row_keys: list[bytes] = row_set.get("row_keys", []) - adjusted_keys = [] - for key in row_keys: - if key not in emitted_rows: - adjusted_keys.append(key) - # if user specified only a single range, set start to the last seen key - row_ranges: list[dict[str, Any]] = row_set.get("row_ranges", []) - if len(row_keys) == 0 and len(row_ranges) == 1: - row_ranges[0]["start_key_open"] = max(emitted_rows) - if "start_key_closed" in row_ranges[0]: - row_ranges[0].pop("start_key_closed") - return {"row_keys": adjusted_keys, "row_ranges": row_ranges} - async def read_rows( self, query: ReadRowsQuery | dict[str, Any], diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index b9035667c..cf04b4d3f 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -34,6 +34,88 @@ class InvalidChunk(RuntimeError): """Exception raised to invalid chunk data from back-end.""" +class RetryableRowMerger(): + + def __init__( + self, + request: dict[str, Any], + gapic_fn, + *, + cache_size: int|None = None, + operation_timeout: float|None = None, + per_row_timeout: float|None = None, + revise_on_retry: bool = True, + ): + self.revise_on_retry = revise_on_retry + self.last_seen_row_key : bytes | None = None + self.emitted_rows : Set[bytes] = set() + self.request = request + + # lock in paramters for retryable wrapper + partial_retryable = functools.partial( + self.retryable_wrapper, + cache_size, + per_row_timeout, + per_request_timeout, + gapic_fn + ) + + retry = retries.AsyncRetry( + predicate=retries.if_exception_type( + InvalidChunk, + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + ), + timeout=operation_timeout, + initial=0.1, + multiplier=2, + maximum=1, + is_generator=True, + ) + self.retryable_stream = retry(partial_retryable) + + async def __aiter__(self): + return self.retryable_stream.__aiter__() + + async def retryable_wrapper(cache_size. per_row_timeout, per_request_timeout, gapic_fn): + if self.revise_on_retry and self.last_seen_row_key is not None: + # if this is a retry, try to trim down the request to avoid ones we've already processed + self.request["rows"] = self._revise_rowset( + self.request.get("rows", None), self.last_seen_row_key, self.emitted_rows + ) + new_gapic_stream = await gapic_fn(self.request, timeout=per_request_timeout) + self.last_merger = RowMerger() + async for row in merger.merge_row_stream_with_cache(new_gapic_stream, cache_size, per_row_timeout): + # ignore duplicates after retry + if row not in self.emitted_rows: + self.emitted_rows.add(row) + self.last_seen_row_key = row.row_key + yield row + + def _revise_request_rowset( + self, row_set: dict[str, Any] | None, last_seen_row_key: bytes, emitted_rows: Set[bytes] + ) -> dict[str, Any]: + # if user is doing a whole table scan, start a new one with the last seen key + if row_set is None: + last_seen = last_seen_row_key + return { + "row_keys": [], + "row_ranges": [{"start_key_open": last_seen}], + } + else: + # remove seen keys from user-specific key list + row_keys: list[bytes] = row_set.get("row_keys", []) + adjusted_keys = [] + for key in row_keys: + if key not in emitted_rows: + adjusted_keys.append(key) + # if user specified only a single range, set start to the last seen key + row_ranges: list[dict[str, Any]] = row_set.get("row_ranges", []) + if len(row_keys) == 0 and len(row_ranges) == 1: + row_ranges[0]["start_key_open"] = last_seen_row_key + if "start_key_closed" in row_ranges[0]: + row_ranges[0].pop("start_key_closed") + return {"row_keys": adjusted_keys, "row_ranges": row_ranges} class RowMerger: """ @@ -116,7 +198,7 @@ async def merge_row_stream_with_cache( """ if max_cache_size is None: max_cache_size = -1 - cache: asyncio.Queue[Row] = asyncio.Queue(max_cache_size) + cache: asyncio.Queue[Row|RequestStats] = asyncio.Queue(max_cache_size) stream_task = asyncio.create_task( self._generator_to_cache(cache, self.merge_row_stream(request_generator)) From 179c8b8ec1a5a33174d871c035c9cfa42e1b9373 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 15:02:41 -0700 Subject: [PATCH 191/349] fixed issues in merger --- google/cloud/bigtable/row_merger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index cf04b4d3f..f00f3ee18 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -77,14 +77,14 @@ def __init__( async def __aiter__(self): return self.retryable_stream.__aiter__() - async def retryable_wrapper(cache_size. per_row_timeout, per_request_timeout, gapic_fn): + async def retryable_wrapper(self, cache_size, per_row_timeout, per_request_timeout, gapic_fn): if self.revise_on_retry and self.last_seen_row_key is not None: # if this is a retry, try to trim down the request to avoid ones we've already processed self.request["rows"] = self._revise_rowset( self.request.get("rows", None), self.last_seen_row_key, self.emitted_rows ) new_gapic_stream = await gapic_fn(self.request, timeout=per_request_timeout) - self.last_merger = RowMerger() + merger = RowMerger() async for row in merger.merge_row_stream_with_cache(new_gapic_stream, cache_size, per_row_timeout): # ignore duplicates after retry if row not in self.emitted_rows: From 3cc5380f19fa39b639c6566c7798d97511a76fef Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 15:11:39 -0700 Subject: [PATCH 192/349] moved streaming into cache into RetryableRowMerger --- google/cloud/bigtable/row_merger.py | 102 +++++++++++----------------- 1 file changed, 38 insertions(+), 64 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index f00f3ee18..2281a0ed8 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -77,6 +77,15 @@ def __init__( async def __aiter__(self): return self.retryable_stream.__aiter__() + async def _generator_to_cache( + self, cache: asyncio.Queue[Any], input_generator: AsyncIterable[Any] + ) -> None: + """ + Helper function to push items from an async generator into a cache + """ + async for item in input_generator: + await cache.put(item) + async def retryable_wrapper(self, cache_size, per_row_timeout, per_request_timeout, gapic_fn): if self.revise_on_retry and self.last_seen_row_key is not None: # if this is a retry, try to trim down the request to avoid ones we've already processed @@ -84,13 +93,36 @@ async def retryable_wrapper(self, cache_size, per_row_timeout, per_request_timeo self.request.get("rows", None), self.last_seen_row_key, self.emitted_rows ) new_gapic_stream = await gapic_fn(self.request, timeout=per_request_timeout) + cache: asyncio.Queue[Row|RequestStats] = asyncio.Queue(max_cache_size) merger = RowMerger() - async for row in merger.merge_row_stream_with_cache(new_gapic_stream, cache_size, per_row_timeout): - # ignore duplicates after retry - if row not in self.emitted_rows: - self.emitted_rows.add(row) - self.last_seen_row_key = row.row_key - yield row + stream_task = asyncio.create_task( + self._generator_to_cache(cache, merger.merge_row_stream(new_gapic_stream)) + ) + try: + # read from state machine and push into cache + while not stream_task.done() or not cache.empty(): + if not cache.empty(): + new_item = await cache.get() + # don't yield rows that have already been emitted + if isinstance(new_item, RequestStats): + yield new_item + elif isinstance(new_item, Row) and new_item.row_key not in self.emitted_rows: + self.last_seen_row_key = new_item.row_key + self.emitted_rows.add(new_item.row_key) + yield new_item + else: + # wait for either the stream to finish, or a new item to enter the cache + get_from_cache = asyncio.wait_for(cache.get(), per_row_timeout) + first_finish = asyncio.wait( + [stream_task, get_from_cache], + return_when=asyncio.FIRST_COMPLETED, + ) + await asyncio.wait_for(first_finish, per_row_timeout) + # stream and cache are complete. if there's an exception, raise it + if stream_task.exception(): + raise cast(Exception, stream_task.exception()) + finally: + stream_task.cancel() def _revise_request_rowset( self, row_set: dict[str, Any] | None, last_seen_row_key: bytes, emitted_rows: Set[bytes] @@ -165,64 +197,6 @@ async def merge_row_stream( # read rows is complete, but there's still data in the merger raise InvalidChunk("read_rows completed with partial state remaining") - async def _generator_to_cache( - self, cache: asyncio.Queue[Any], input_generator: AsyncIterable[Any] - ) -> None: - """ - Helper function to push items from an async generator into a cache - """ - async for item in input_generator: - await cache.put(item) - - async def merge_row_stream_with_cache( - self, - request_generator: AsyncIterable[ReadRowsResponse], - max_cache_size: int | None = None, - per_row_timeout: float | None = None, - ) -> AsyncGenerator[Row | RequestStats, None]: - """ - Consume chunks from a ReadRowsResponse stream into a set of Rows, - with a local cache to decouple the producer from the consumer - - Args: - - request_generator: AsyncIterable of ReadRowsResponse objects. Typically - this is a stream of chunks from the Bigtable API - - max_cache_size: maximum number of items to cache. If None, cache size - is unbounded - - per_row_timeout: maximum time to wait for a complete row. If None, - timeout is unbounded - Returns: - - AsyncGenerator of Rows - Raises: - - InvalidChunk: if the chunk stream is invalid - """ - if max_cache_size is None: - max_cache_size = -1 - cache: asyncio.Queue[Row|RequestStats] = asyncio.Queue(max_cache_size) - - stream_task = asyncio.create_task( - self._generator_to_cache(cache, self.merge_row_stream(request_generator)) - ) - try: - # read from state machine and push into cache - while not stream_task.done() or not cache.empty(): - if not cache.empty(): - yield await cache.get() - else: - # wait for either the stream to finish, or a new item to enter the cache - get_from_cache = asyncio.wait_for(cache.get(), per_row_timeout) - first_finish = asyncio.wait( - [stream_task, get_from_cache], - return_when=asyncio.FIRST_COMPLETED, - ) - await asyncio.wait_for(first_finish, per_row_timeout) - # stream and cache are complete. if there's an exception, raise it - if stream_task.exception(): - raise cast(Exception, stream_task.exception()) - finally: - stream_task.cancel() - - class StateMachine: """ State Machine converts chunks into Rows From 4af02181d37c3e0169cfcc62a778931a6e2aae7f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 15:18:02 -0700 Subject: [PATCH 193/349] restructuring --- google/cloud/bigtable/client.py | 14 +++++++++- google/cloud/bigtable/row_merger.py | 42 ++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 5df684a73..6f5f01f7d 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -395,8 +395,20 @@ async def read_rows_stream( request["table_name"] = self.client.table_path(self.table_id) request["app_profile_id"] = self.app_profile_id + # read_rows smart retries is implemented using a series of generators: + # - client.read_rows: outputs raw ReadRowsResponse objects from backend. Has per_request_timeout + # - RowMerger.merge_row_stream: parses chunks into rows + # - RetryableRowMerger.retryable_wrapper: adds retries, caching, revised requests, per_row_timeout, per_row_timeout + # - ReadRowsGenerator: adds idle_timeout, moves stats out of stream and into attribute return ReadRowsGenerator( - RetryableRowMerger(request, self.client.read_rows, cache_size=cache_size, operation_timeout=operation_timeout, per_row_timeout=per_row_timeout, idle_timeout=idle_timeout, per_request_timeout=per_request_timeout) + RetryableRowMerger( + request, + self.client.read_rows, + cache_size=cache_size, + operation_timeout=operation_timeout, + per_row_timeout=per_row_timeout, + per_request_timeout=per_request_timeout, + ) ) async def read_rows( diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 2281a0ed8..5e37ca86c 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -18,6 +18,9 @@ from google.cloud.bigtable_v2.types import RequestStats from google.cloud.bigtable.row import Row, Cell, _LastScannedRow import asyncio +from functools import partial +from google.api_core import retry_async as retries +from google.api_core import exceptions as core_exceptions from abc import ABC, abstractmethod @@ -34,30 +37,31 @@ class InvalidChunk(RuntimeError): """Exception raised to invalid chunk data from back-end.""" -class RetryableRowMerger(): +class RetryableRowMerger: def __init__( self, request: dict[str, Any], gapic_fn, *, - cache_size: int|None = None, - operation_timeout: float|None = None, - per_row_timeout: float|None = None, + cache_size: int | None = None, + operation_timeout: float | None = None, + per_row_timeout: float | None = None, + per_request_timeout: float | None = None, revise_on_retry: bool = True, ): self.revise_on_retry = revise_on_retry - self.last_seen_row_key : bytes | None = None - self.emitted_rows : Set[bytes] = set() + self.last_seen_row_key: bytes | None = None + self.emitted_rows: Set[bytes] = set() self.request = request # lock in paramters for retryable wrapper - partial_retryable = functools.partial( + partial_retryable = partial( self.retryable_wrapper, cache_size, per_row_timeout, per_request_timeout, - gapic_fn + gapic_fn, ) retry = retries.AsyncRetry( @@ -86,14 +90,18 @@ async def _generator_to_cache( async for item in input_generator: await cache.put(item) - async def retryable_wrapper(self, cache_size, per_row_timeout, per_request_timeout, gapic_fn): + async def retryable_wrapper( + self, cache_size, per_row_timeout, per_request_timeout, gapic_fn + ): if self.revise_on_retry and self.last_seen_row_key is not None: # if this is a retry, try to trim down the request to avoid ones we've already processed self.request["rows"] = self._revise_rowset( - self.request.get("rows", None), self.last_seen_row_key, self.emitted_rows + self.request.get("rows", None), + self.last_seen_row_key, + self.emitted_rows, ) new_gapic_stream = await gapic_fn(self.request, timeout=per_request_timeout) - cache: asyncio.Queue[Row|RequestStats] = asyncio.Queue(max_cache_size) + cache: asyncio.Queue[Row | RequestStats] = asyncio.Queue(cache_size) merger = RowMerger() stream_task = asyncio.create_task( self._generator_to_cache(cache, merger.merge_row_stream(new_gapic_stream)) @@ -106,7 +114,10 @@ async def retryable_wrapper(self, cache_size, per_row_timeout, per_request_timeo # don't yield rows that have already been emitted if isinstance(new_item, RequestStats): yield new_item - elif isinstance(new_item, Row) and new_item.row_key not in self.emitted_rows: + elif ( + isinstance(new_item, Row) + and new_item.row_key not in self.emitted_rows + ): self.last_seen_row_key = new_item.row_key self.emitted_rows.add(new_item.row_key) yield new_item @@ -125,7 +136,10 @@ async def retryable_wrapper(self, cache_size, per_row_timeout, per_request_timeo stream_task.cancel() def _revise_request_rowset( - self, row_set: dict[str, Any] | None, last_seen_row_key: bytes, emitted_rows: Set[bytes] + self, + row_set: dict[str, Any] | None, + last_seen_row_key: bytes, + emitted_rows: Set[bytes], ) -> dict[str, Any]: # if user is doing a whole table scan, start a new one with the last seen key if row_set is None: @@ -149,6 +163,7 @@ def _revise_request_rowset( row_ranges[0].pop("start_key_closed") return {"row_keys": adjusted_keys, "row_ranges": row_ranges} + class RowMerger: """ RowMerger takes in a stream of ReadRows chunks @@ -197,6 +212,7 @@ async def merge_row_stream( # read rows is complete, but there's still data in the merger raise InvalidChunk("read_rows completed with partial state remaining") + class StateMachine: """ State Machine converts chunks into Rows From d6a323f5c82f843975e3f02c2efbb68ad216fdf1 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 15:36:12 -0700 Subject: [PATCH 194/349] added idle timeout --- google/cloud/bigtable/client.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 6f5f01f7d..09ce4a5aa 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -400,7 +400,7 @@ async def read_rows_stream( # - RowMerger.merge_row_stream: parses chunks into rows # - RetryableRowMerger.retryable_wrapper: adds retries, caching, revised requests, per_row_timeout, per_row_timeout # - ReadRowsGenerator: adds idle_timeout, moves stats out of stream and into attribute - return ReadRowsGenerator( + generator = ReadRowsGenerator( RetryableRowMerger( request, self.client.read_rows, @@ -410,6 +410,10 @@ async def read_rows_stream( per_request_timeout=per_request_timeout, ) ) + # add idle timeout + if idle_timeout: + generator._start_idle_timer(idle_timeout) + return generator async def read_rows( self, @@ -684,15 +688,36 @@ class ReadRowsGenerator(AsyncIterable[Row]): User-facing async generator for streaming read_rows responses """ - def __init__(self, stream: AsyncGenerator[Row | RequestStats, None]): - self.stream = stream + def __init__(self, stream: RetryableRowMerger): + self.stream: RetryableRowMerger = stream self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() + self.expired = False + self._idle_timeout_task: asyncio.Task | None = None + + async def _start_idle_timer(self, idle_timeout: float): + if self._idle_timeout_task: + await self._idle_timeout_task.cancel() + self._idle_timeout_task = asyncio.create_task( + self._idle_timeout_coroutine(idle_timeout) + ) + + async def _idle_timeout_coroutine(self, idle_timeout:float): + while self.stream.is_active(): + next_timeout = self.last_interaction_time + idle_timeout + await asyncio.sleep(next_timeout - time.time()) + if self.last_interaction_time + idle_timeout < time.time(): + # idle timeout has expired + self.expired = True + self.stream.cancel() + return async def __aiter__(self): return self async def __anext__(self) -> Row: + if self.expired: + raise core_exceptions.DeadlineExceeded("Idle timeout expired") self.last_interaction_time = time.time() next_item = await self.stream.__anext__() if isinstance(next_item, RequestStats): From 7b6d1db15547249126856635918207f2774d4c83 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 15:43:16 -0700 Subject: [PATCH 195/349] keep track of last_raised --- google/cloud/bigtable/client.py | 36 +++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 09ce4a5aa..408f312e3 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -683,7 +683,7 @@ async def read_modify_write_row( raise NotImplementedError -class ReadRowsGenerator(AsyncIterable[Row]): +class ReadRowsGenerator(AsyncGenerator[Row, None]): """ User-facing async generator for streaming read_rows responses """ @@ -692,7 +692,7 @@ def __init__(self, stream: RetryableRowMerger): self.stream: RetryableRowMerger = stream self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() - self.expired = False + self.last_raised: Exception | None = None self._idle_timeout_task: asyncio.Task | None = None async def _start_idle_timer(self, idle_timeout: float): @@ -708,20 +708,30 @@ async def _idle_timeout_coroutine(self, idle_timeout:float): await asyncio.sleep(next_timeout - time.time()) if self.last_interaction_time + idle_timeout < time.time(): # idle timeout has expired - self.expired = True - self.stream.cancel() + self.last_raised = DeadlineExceeded("idle timeout expired") + self.stream.close() return async def __aiter__(self): return self + async def __aclose__(self): + await self.stream.close() + self.stream = None + self.last_raised = GeneratorExit("generator closed") + async def __anext__(self) -> Row: - if self.expired: - raise core_exceptions.DeadlineExceeded("Idle timeout expired") - self.last_interaction_time = time.time() - next_item = await self.stream.__anext__() - if isinstance(next_item, RequestStats): - self.request_stats = next_item - return await self.__anext__() - else: - return next_item + if self.last_raised: + raise self.last_raised + try: + self.last_interaction_time = time.time() + next_item = await self.stream.__anext__() + if isinstance(next_item, RequestStats): + self.request_stats = next_item + return await self.__anext__() + else: + return next_item + except Exception as e: + await self.stream.close() + self.last_raised = e + raise e From 733a393b76ad06590b820052d60742b84538f0ec Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 16:04:05 -0700 Subject: [PATCH 196/349] fixed mypy issues --- google/cloud/bigtable/client.py | 44 +++++++++++++---------------- google/cloud/bigtable/row_merger.py | 2 +- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 408f312e3..259916cfc 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -346,19 +346,19 @@ async def read_rows_stream( self, query: ReadRowsQuery | dict[str, Any], *, - cache_size_limit: int | None = None, + cache_size: int | None = None, operation_timeout: int | float | None = 60, per_row_timeout: int | float | None = 10, idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, - ) -> ReadRowsGenerator: + ) -> ReadRowsIterator: """ - Returns a generator to asynchronously stream back row data. + Returns an iterator to asynchronously stream back row data. Failed requests within operation_timeout and operation_deadline policies will be retried. By default, row data is streamed eagerly over the network, and fully cached in memory - in the generator, which can be consumed as needed. The size of the generator cache can + in the iterator, which can be consumed as needed. The size of the iterator cache can be configured with cache_size_limit. When the cache is full, the read_rows_stream will pause the network stream until space is available @@ -384,12 +384,12 @@ async def read_rows_stream( a DeadlineExceeded exception, and a retry will be attempted Returns: - - an asynchronous generator that yields rows returned by the query + - an asynchronous iterator that yields rows returned by the query Raises: - DeadlineExceeded: raised after operation timeout will be chained with a RetryExceptionGroup containing GoogleAPIError exceptions from any retries that failed - - IdleTimeout: if generator was abandoned + - IdleTimeout: if iterator was abandoned """ request = query._to_dict() if isinstance(query, ReadRowsQuery) else query request["table_name"] = self.client.table_path(self.table_id) @@ -399,8 +399,8 @@ async def read_rows_stream( # - client.read_rows: outputs raw ReadRowsResponse objects from backend. Has per_request_timeout # - RowMerger.merge_row_stream: parses chunks into rows # - RetryableRowMerger.retryable_wrapper: adds retries, caching, revised requests, per_row_timeout, per_row_timeout - # - ReadRowsGenerator: adds idle_timeout, moves stats out of stream and into attribute - generator = ReadRowsGenerator( + # - ReadRowsIterator: adds idle_timeout, moves stats out of stream and into attribute + generator = ReadRowsIterator( RetryableRowMerger( request, self.client.read_rows, @@ -412,7 +412,7 @@ async def read_rows_stream( ) # add idle timeout if idle_timeout: - generator._start_idle_timer(idle_timeout) + await generator._start_idle_timer(idle_timeout) return generator async def read_rows( @@ -462,7 +462,7 @@ async def read_rows_sharded( per_row_timeout: int | float | None = 10, idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, - ) -> ReadRowsGenerator: + ) -> ReadRowsIterator: """ Runs a sharded query in parallel @@ -683,7 +683,7 @@ async def read_modify_write_row( raise NotImplementedError -class ReadRowsGenerator(AsyncGenerator[Row, None]): +class ReadRowsIterator(AsyncIterable[Row]): """ User-facing async generator for streaming read_rows responses """ @@ -693,45 +693,39 @@ def __init__(self, stream: RetryableRowMerger): self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() self.last_raised: Exception | None = None - self._idle_timeout_task: asyncio.Task | None = None + self._idle_timeout_task: asyncio.Task[None] | None = None async def _start_idle_timer(self, idle_timeout: float): - if self._idle_timeout_task: - await self._idle_timeout_task.cancel() + self.last_interaction_time = time.time() + if self._idle_timeout_task is not None: + self._idle_timeout_task.cancel() self._idle_timeout_task = asyncio.create_task( self._idle_timeout_coroutine(idle_timeout) ) async def _idle_timeout_coroutine(self, idle_timeout:float): - while self.stream.is_active(): + while self.last_raised is None: next_timeout = self.last_interaction_time + idle_timeout await asyncio.sleep(next_timeout - time.time()) - if self.last_interaction_time + idle_timeout < time.time(): + if self.last_interaction_time + idle_timeout < time.time() and self.last_raised is None: # idle timeout has expired - self.last_raised = DeadlineExceeded("idle timeout expired") - self.stream.close() - return + self.last_raised = core_exceptions.DeadlineExceeded("idle timeout expired") async def __aiter__(self): return self - async def __aclose__(self): - await self.stream.close() - self.stream = None - self.last_raised = GeneratorExit("generator closed") async def __anext__(self) -> Row: if self.last_raised: raise self.last_raised try: self.last_interaction_time = time.time() - next_item = await self.stream.__anext__() + next_item = await self.stream.__aiter__().__anext__() if isinstance(next_item, RequestStats): self.request_stats = next_item return await self.__anext__() else: return next_item except Exception as e: - await self.stream.close() self.last_raised = e raise e diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 5e37ca86c..38ff894e1 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -95,7 +95,7 @@ async def retryable_wrapper( ): if self.revise_on_retry and self.last_seen_row_key is not None: # if this is a retry, try to trim down the request to avoid ones we've already processed - self.request["rows"] = self._revise_rowset( + self.request["rows"] = self._revise_request_rowset( self.request.get("rows", None), self.last_seen_row_key, self.emitted_rows, From 12807e079df46972416266f26c7a6f4d2aeb85cc Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 16:06:27 -0700 Subject: [PATCH 197/349] made idle timeout internal value --- google/cloud/bigtable/client.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 259916cfc..0db34bb8d 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -349,7 +349,6 @@ async def read_rows_stream( cache_size: int | None = None, operation_timeout: int | float | None = 60, per_row_timeout: int | float | None = 10, - idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, ) -> ReadRowsIterator: """ @@ -375,10 +374,6 @@ async def read_rows_stream( longer than per_row_timeout to complete, the ongoing network request will be with a DeadlineExceeded exception, and a retry may be attempted Applies only to the underlying network call. - - idle_timeout: the number of idle seconds before an active generator is marked as - stale and the cache is drained. The idle count is reset each time the generator - is yielded from - raises DeadlineExceeded on future yields - per_request_timeout: the time budget for an individual network request, in seconds. If it takes longer than this time to complete, the request will be cancelled with a DeadlineExceeded exception, and a retry will be attempted @@ -410,9 +405,8 @@ async def read_rows_stream( per_request_timeout=per_request_timeout, ) ) - # add idle timeout - if idle_timeout: - await generator._start_idle_timer(idle_timeout) + # add idle timeout to clear resources if generator is abandoned + await generator._start_idle_timer(600) return generator async def read_rows( @@ -709,7 +703,7 @@ async def _idle_timeout_coroutine(self, idle_timeout:float): await asyncio.sleep(next_timeout - time.time()) if self.last_interaction_time + idle_timeout < time.time() and self.last_raised is None: # idle timeout has expired - self.last_raised = core_exceptions.DeadlineExceeded("idle timeout expired") + self.last_raised = IdleTimeout("idle timeout expired") async def __aiter__(self): return self @@ -729,3 +723,6 @@ async def __anext__(self) -> Row: except Exception as e: self.last_raised = e raise e + +class IdleTimeout(core_exceptions.DeadlineExceeded): + pass From 0e3d32ccecb5ece7963580266b215ef39e02da42 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 16:31:51 -0700 Subject: [PATCH 198/349] combined row merger functions --- google/cloud/bigtable/client.py | 11 +++---- google/cloud/bigtable/row_merger.py | 49 +++++++++++++---------------- 2 files changed, 27 insertions(+), 33 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 0db34bb8d..c1b7f8534 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -40,7 +40,6 @@ from google.cloud.client import _ClientProjectMixin from google.api_core.exceptions import GoogleAPICallError from google.cloud.bigtable.row_merger import RowMerger -from google.cloud.bigtable.row_merger import RetryableRowMerger from google.cloud.bigtable.row_merger import InvalidChunk from google.cloud.bigtable_v2.types import RequestStats @@ -392,11 +391,11 @@ async def read_rows_stream( # read_rows smart retries is implemented using a series of generators: # - client.read_rows: outputs raw ReadRowsResponse objects from backend. Has per_request_timeout - # - RowMerger.merge_row_stream: parses chunks into rows - # - RetryableRowMerger.retryable_wrapper: adds retries, caching, revised requests, per_row_timeout, per_row_timeout + # - RowMerger.merge_row_response_stream: parses chunks into rows + # - RowMerger.retryable_merge_rows: adds retries, caching, revised requests, per_row_timeout, per_row_timeout # - ReadRowsIterator: adds idle_timeout, moves stats out of stream and into attribute generator = ReadRowsIterator( - RetryableRowMerger( + RowMerger( request, self.client.read_rows, cache_size=cache_size, @@ -682,8 +681,8 @@ class ReadRowsIterator(AsyncIterable[Row]): User-facing async generator for streaming read_rows responses """ - def __init__(self, stream: RetryableRowMerger): - self.stream: RetryableRowMerger = stream + def __init__(self, stream: RowMerger): + self.stream: RowMerger = stream self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() self.last_raised: Exception | None = None diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 38ff894e1..e752f1d06 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -38,7 +38,17 @@ class InvalidChunk(RuntimeError): """Exception raised to invalid chunk data from back-end.""" -class RetryableRowMerger: +class RowMerger: + """ + RowMerger takes in a stream of ReadRows chunks + and processes them into a stream of Rows. + + RowMerger can wrap the stream directly, or use a cache to decouple + the producer from the consumer + + RowMerger uses a StateMachine instance to handle the chunk parsing + logic + """ def __init__( self, request: dict[str, Any], @@ -57,7 +67,7 @@ def __init__( # lock in paramters for retryable wrapper partial_retryable = partial( - self.retryable_wrapper, + self.retryable_merge_rows, cache_size, per_row_timeout, per_request_timeout, @@ -90,7 +100,7 @@ async def _generator_to_cache( async for item in input_generator: await cache.put(item) - async def retryable_wrapper( + async def retryable_merge_rows( self, cache_size, per_row_timeout, per_request_timeout, gapic_fn ): if self.revise_on_retry and self.last_seen_row_key is not None: @@ -102,9 +112,9 @@ async def retryable_wrapper( ) new_gapic_stream = await gapic_fn(self.request, timeout=per_request_timeout) cache: asyncio.Queue[Row | RequestStats] = asyncio.Queue(cache_size) - merger = RowMerger() + state_machine = StateMachine() stream_task = asyncio.create_task( - self._generator_to_cache(cache, merger.merge_row_stream(new_gapic_stream)) + self._generator_to_cache(cache, self.merge_row_response_stream(new_gapic_stream, state_machine)) ) try: # read from state machine and push into cache @@ -135,8 +145,8 @@ async def retryable_wrapper( finally: stream_task.cancel() + @staticmethod def _revise_request_rowset( - self, row_set: dict[str, Any] | None, last_seen_row_key: bytes, emitted_rows: Set[bytes], @@ -163,24 +173,9 @@ def _revise_request_rowset( row_ranges[0].pop("start_key_closed") return {"row_keys": adjusted_keys, "row_ranges": row_ranges} - -class RowMerger: - """ - RowMerger takes in a stream of ReadRows chunks - and processes them into a stream of Rows. - - RowMerger can wrap the stream directly, or use a cache to decouple - the producer from the consumer - - RowMerger uses a StateMachine instance to handle the chunk parsing - logic - """ - - def __init__(self): - self.state_machine: StateMachine = StateMachine() - - async def merge_row_stream( - self, request_generator: AsyncIterable[ReadRowsResponse] + @staticmethod + async def merge_row_response_stream( + request_generator: AsyncIterable[ReadRowsResponse], state_machine: StateMachine ) -> AsyncGenerator[Row | RequestStats, None]: """ Consume chunks from a ReadRowsResponse stream into a set of Rows @@ -199,16 +194,16 @@ async def merge_row_stream( last_scanned = response_pb.last_scanned_row_key # if the server sends a scan heartbeat, notify the state machine. if last_scanned: - yield self.state_machine.handle_last_scanned_row(last_scanned) + yield state_machine.handle_last_scanned_row(last_scanned) # process new chunks through the state machine. for chunk in response_pb.chunks: - complete_row = self.state_machine.handle_chunk(chunk) + complete_row = state_machine.handle_chunk(chunk) if complete_row is not None: yield complete_row # yield request stats if present if response_pb.stats: yield response_pb.stats - if not self.state_machine.is_terminal_state(): + if not state_machine.is_terminal_state(): # read rows is complete, but there's still data in the merger raise InvalidChunk("read_rows completed with partial state remaining") From 5b055b4ba3365f95787cc6e65a0f1df67663ead3 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 16:39:53 -0700 Subject: [PATCH 199/349] made adjustments to RowMerger --- google/cloud/bigtable/client.py | 11 +++++++---- google/cloud/bigtable/row_merger.py | 17 ++++++++++++----- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index c1b7f8534..af54754dd 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -397,7 +397,7 @@ async def read_rows_stream( generator = ReadRowsIterator( RowMerger( request, - self.client.read_rows, + self.client, cache_size=cache_size, operation_timeout=operation_timeout, per_row_timeout=per_row_timeout, @@ -696,18 +696,20 @@ async def _start_idle_timer(self, idle_timeout: float): self._idle_timeout_coroutine(idle_timeout) ) - async def _idle_timeout_coroutine(self, idle_timeout:float): + async def _idle_timeout_coroutine(self, idle_timeout: float): while self.last_raised is None: next_timeout = self.last_interaction_time + idle_timeout await asyncio.sleep(next_timeout - time.time()) - if self.last_interaction_time + idle_timeout < time.time() and self.last_raised is None: + if ( + self.last_interaction_time + idle_timeout < time.time() + and self.last_raised is None + ): # idle timeout has expired self.last_raised = IdleTimeout("idle timeout expired") async def __aiter__(self): return self - async def __anext__(self) -> Row: if self.last_raised: raise self.last_raised @@ -723,5 +725,6 @@ async def __anext__(self) -> Row: self.last_raised = e raise e + class IdleTimeout(core_exceptions.DeadlineExceeded): pass diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index e752f1d06..a48c57f44 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -15,6 +15,7 @@ from __future__ import annotations from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse +from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient from google.cloud.bigtable_v2.types import RequestStats from google.cloud.bigtable.row import Row, Cell, _LastScannedRow import asyncio @@ -38,7 +39,7 @@ class InvalidChunk(RuntimeError): """Exception raised to invalid chunk data from back-end.""" -class RowMerger: +class RowMerger(AsyncIterable[Row]): """ RowMerger takes in a stream of ReadRows chunks and processes them into a stream of Rows. @@ -49,10 +50,11 @@ class RowMerger: RowMerger uses a StateMachine instance to handle the chunk parsing logic """ + def __init__( self, request: dict[str, Any], - gapic_fn, + client: BigtableAsyncClient, *, cache_size: int | None = None, operation_timeout: float | None = None, @@ -71,7 +73,7 @@ def __init__( cache_size, per_row_timeout, per_request_timeout, - gapic_fn, + client.read_rows, ) retry = retries.AsyncRetry( @@ -89,7 +91,10 @@ def __init__( self.retryable_stream = retry(partial_retryable) async def __aiter__(self): - return self.retryable_stream.__aiter__() + return self + + async def __anext__(self): + return await self.retryable_stream().__anext__() async def _generator_to_cache( self, cache: asyncio.Queue[Any], input_generator: AsyncIterable[Any] @@ -114,7 +119,9 @@ async def retryable_merge_rows( cache: asyncio.Queue[Row | RequestStats] = asyncio.Queue(cache_size) state_machine = StateMachine() stream_task = asyncio.create_task( - self._generator_to_cache(cache, self.merge_row_response_stream(new_gapic_stream, state_machine)) + self._generator_to_cache( + cache, self.merge_row_response_stream(new_gapic_stream, state_machine) + ) ) try: # read from state machine and push into cache From dbf19c9136d0ce630c39a05ab4911ae6f5f3211e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 17:14:20 -0700 Subject: [PATCH 200/349] holds a gapic client instead of inherits from it --- google/cloud/bigtable/client.py | 46 +++++++++++++++------------------ tests/unit/test_client.py | 44 ++++++++++++++++++------------- 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index e378a3a7c..8931d21f4 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -29,7 +29,7 @@ from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( PooledBigtableGrpcAsyncIOTransport, ) -from google.cloud.client import _ClientProjectMixin +from google.cloud.client import ClientWithProject from google.api_core.exceptions import GoogleAPICallError @@ -48,7 +48,7 @@ from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule -class BigtableDataClient(BigtableAsyncClient, _ClientProjectMixin): +class BigtableDataClient(ClientWithProject): def __init__( self, *, @@ -95,20 +95,22 @@ def __init__( client_options = cast( Optional[client_options_lib.ClientOptions], client_options ) - mixin_args = {"project": project, "credentials": credentials} - # support google-api-core <=1.5.0, which does not have credentials - if "credentials" not in _ClientProjectMixin.__init__.__code__.co_varnames: - mixin_args.pop("credentials") # initialize client - _ClientProjectMixin.__init__(self, **mixin_args) - # raises RuntimeError if called outside of an async run loop context - BigtableAsyncClient.__init__( + ClientWithProject.__init__( self, + credentials=credentials, + project=project, + client_options=client_options, + ) + self._gapic_client = BigtableAsyncClient( transport=transport_str, credentials=credentials, client_options=client_options, client_info=client_info, ) + self.transport = cast( + PooledBigtableGrpcAsyncIOTransport, self._gapic_client.transport + ) # keep track of active instances to for warmup on channel refresh self._active_instances: Set[str] = set() # attempt to start background tasks @@ -140,14 +142,6 @@ def start_background_channel_refresh(self) -> None: ) self._channel_refresh_tasks.append(refresh_task) - @property - def transport(self) -> PooledBigtableGrpcAsyncIOTransport: - """Returns the transport used by the client instance. - Returns: - BigtableTransport: The transport used by the client instance. - """ - return cast(PooledBigtableGrpcAsyncIOTransport, self._client.transport) - async def close(self, timeout: float = 2.0): """ Cancel all background tasks @@ -159,12 +153,6 @@ async def close(self, timeout: float = 2.0): await self.transport.close() self._channel_refresh_tasks = [] - async def __aexit__(self, exc_type, exc_val, exc_tb): - """ - Cleanly close context manager on exit - """ - await self.close() - async def _ping_and_warm_instances( self, channel: grpc.aio.Channel ) -> list[GoogleAPICallError | None]: @@ -232,7 +220,7 @@ async def register_instance(self, instance_id: str): requests, and new channels will be warmed for each registered instance Channels will not be refreshed unless at least one instance is registered """ - instance_name = self.instance_path(self.project, instance_id) + instance_name = self._gapic_client.instance_path(self.project, instance_id) if instance_name not in self._active_instances: self._active_instances.add(instance_name) if self._channel_refresh_tasks: @@ -256,7 +244,7 @@ async def remove_instance_registration(self, instance_id: str) -> bool: Returns: - True if instance was removed """ - instance_name = self.instance_path(self.project, instance_id) + instance_name = self._gapic_client.instance_path(self.project, instance_id) try: self._active_instances.remove(instance_name) return True @@ -282,6 +270,14 @@ def get_table( """ return Table(self, instance_id, table_id, app_profile_id) + async def __aenter__(self): + self.start_background_channel_refresh() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() + await self._gapic_client.__aexit__(exc_type, exc_val, exc_tb) + class Table: """ diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ecbcb63d1..2481548b1 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -68,7 +68,7 @@ async def test_ctor_super_inits(): from google.cloud.bigtable_v2.services.bigtable.async_client import ( BigtableAsyncClient, ) - from google.cloud.client import _ClientProjectMixin + from google.cloud.client import ClientWithProject from google.api_core import client_options as client_options_lib project = "project-id" @@ -78,11 +78,9 @@ async def test_ctor_super_inits(): options_parsed = client_options_lib.from_dict(client_options) transport_str = f"pooled_grpc_asyncio_{pool_size}" with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: - with mock.patch.object( - _ClientProjectMixin, "__init__" - ) as client_project_mixin_init: - client_project_mixin_init.__code__ = mock.Mock() - client_project_mixin_init.__code__.co_varnames = "credentials" + bigtable_client_init.return_value = None + with mock.patch.object(ClientWithProject, "__init__") as client_project_init: + client_project_init.return_value = None try: _make_one( project=project, @@ -99,10 +97,11 @@ async def test_ctor_super_inits(): assert kwargs["credentials"] == credentials assert kwargs["client_options"] == options_parsed # test mixin superclass init was called - assert client_project_mixin_init.call_count == 1 - kwargs = client_project_mixin_init.call_args[1] + assert client_project_init.call_count == 1 + kwargs = client_project_init.call_args[1] assert kwargs["project"] == project assert kwargs["credentials"] == credentials + assert kwargs["client_options"] == options_parsed @pytest.mark.asyncio @@ -114,17 +113,22 @@ async def test_ctor_dict_options(): from google.cloud.bigtable.client import BigtableDataClient client_options = {"api_endpoint": "foo.bar:1234"} + with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: + try: + _make_one(client_options=client_options) + except TypeError: + pass + bigtable_client_init.assert_called_once() + kwargs = bigtable_client_init.call_args[1] + called_options = kwargs["client_options"] + assert called_options.api_endpoint == "foo.bar:1234" + assert isinstance(called_options, ClientOptions) with mock.patch.object( BigtableDataClient, "start_background_channel_refresh" ) as start_background_refresh: - with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: - _make_one(client_options=client_options) - bigtable_client_init.assert_called_once() - kwargs = bigtable_client_init.call_args[1] - called_options = kwargs["client_options"] - assert called_options.api_endpoint == "foo.bar:1234" - assert isinstance(called_options, ClientOptions) - start_background_refresh.assert_called_once() + client = _make_one(client_options=client_options) + start_background_refresh.assert_called_once() + await client.close() @pytest.mark.asyncio @@ -553,7 +557,9 @@ async def test_get_table(): assert table.instance == expected_instance_id assert table.app_profile_id == expected_app_profile_id assert table.client is client - full_instance_name = client.instance_path(client.project, expected_instance_id) + full_instance_name = client._gapic_client.instance_path( + client.project, expected_instance_id + ) assert full_instance_name in client._active_instances await client.close() @@ -672,7 +678,9 @@ async def test_table_ctor(): assert table.instance == expected_instance_id assert table.app_profile_id == expected_app_profile_id assert table.client is client - full_instance_name = client.instance_path(client.project, expected_instance_id) + full_instance_name = client._gapic_client.instance_path( + client.project, expected_instance_id + ) assert full_instance_name in client._active_instances # ensure task reaches completion await table._register_instance_task From 88f14f65b39cdc463678a122eb9a88d7be019171 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 17:18:37 -0700 Subject: [PATCH 201/349] don't emit _LastScannedRows --- google/cloud/bigtable/row_merger.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index a48c57f44..5dc43e22d 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -136,8 +136,11 @@ async def retryable_merge_rows( and new_item.row_key not in self.emitted_rows ): self.last_seen_row_key = new_item.row_key - self.emitted_rows.add(new_item.row_key) - yield new_item + # don't yeild _LastScannedRow markers; they + # should only update last_seen_row_key + if not isinstance(new_item, _LastScannedRow): + self.emitted_rows.add(new_item.row_key) + yield new_item else: # wait for either the stream to finish, or a new item to enter the cache get_from_cache = asyncio.wait_for(cache.get(), per_row_timeout) From 9f15a6a8d310cd4e12578e64481f2c2f2cc956e9 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 18:23:59 -0700 Subject: [PATCH 202/349] fixed type issues --- google/cloud/bigtable/client.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 1034b5003..29fcdb1be 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -20,7 +20,6 @@ Any, Optional, AsyncIterable, - AsyncGenerator, Set, TYPE_CHECKING, ) @@ -40,11 +39,9 @@ from google.cloud.client import ClientWithProject from google.api_core.exceptions import GoogleAPICallError from google.cloud.bigtable.row_merger import RowMerger -from google.cloud.bigtable.row_merger import InvalidChunk from google.cloud.bigtable_v2.types import RequestStats import google.auth.credentials -from google.api_core import retry_async as retries from google.api_core import exceptions as core_exceptions import google.auth._default from google.api_core import client_options as client_options_lib @@ -53,7 +50,6 @@ from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry from google.cloud.bigtable.mutations_batcher import MutationsBatcher from google.cloud.bigtable.row import Row - from google.cloud.bigtable.row import _LastScannedRow from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable import RowKeySamples from google.cloud.bigtable.row_filters import RowFilter @@ -322,8 +318,13 @@ def __init__( - RuntimeError if called outside of an async run loop context """ self.client = client - self.instance = instance_id - self.table_id = table_id + + self.instance_path = self.client._gapic_client.instance_path( + self.client.project, instance_id + ) + self.table_path = self.client._gapic_client.table_path( + self.client.project, instance_id, table_id + ) self.app_profile_id = app_profile_id # raises RuntimeError if called outside of an async run loop context try: @@ -382,7 +383,7 @@ async def read_rows_stream( - IdleTimeout: if iterator was abandoned """ request = query._to_dict() if isinstance(query, ReadRowsQuery) else query - request["table_name"] = self.client.table_path(self.table_id) + request["table_name"] = self.table_path request["app_profile_id"] = self.app_profile_id # read_rows smart retries is implemented using a series of generators: @@ -393,7 +394,7 @@ async def read_rows_stream( generator = ReadRowsIterator( RowMerger( request, - self.client, + self.client._gapic_client, cache_size=cache_size, operation_timeout=operation_timeout, per_row_timeout=per_row_timeout, From b3c32b091dba2e431cdf632b143b978c0831a740 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 4 Apr 2023 18:52:00 -0700 Subject: [PATCH 203/349] got tests passing --- google/cloud/bigtable/client.py | 2 +- google/cloud/bigtable/read_rows_query.py | 2 +- google/cloud/bigtable/row_merger.py | 4 ++-- tests/unit/test_client.py | 22 ++++++++++++++++------ tests/unit/test_read_rows_acceptance.py | 20 ++++++++++---------- 5 files changed, 30 insertions(+), 20 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 29fcdb1be..c6b77a5fc 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -45,11 +45,11 @@ from google.api_core import exceptions as core_exceptions import google.auth._default from google.api_core import client_options as client_options_lib +from google.cloud.bigtable.row import Row if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry from google.cloud.bigtable.mutations_batcher import MutationsBatcher - from google.cloud.bigtable.row import Row from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable import RowKeySamples from google.cloud.bigtable.row_filters import RowFilter diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 9fd349d5f..559b47f04 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -14,7 +14,7 @@ # from __future__ import annotations from typing import TYPE_CHECKING, Any -from .row_response import row_key +from .row import row_key from dataclasses import dataclass from google.cloud.bigtable.row_filters import RowFilter diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 5dc43e22d..20705a02b 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -211,8 +211,8 @@ async def merge_row_response_stream( if complete_row is not None: yield complete_row # yield request stats if present - if response_pb.stats: - yield response_pb.stats + if row_response.request_stats: + yield response_pb.request_stats if not state_machine.is_terminal_state(): # read rows is complete, but there's still data in the merger raise InvalidChunk("read_rows completed with partial state remaining") diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2481548b1..3999a3429 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -553,13 +553,16 @@ async def test_get_table(): ) await asyncio.sleep(0) assert isinstance(table, Table) - assert table.table_id == expected_table_id - assert table.instance == expected_instance_id assert table.app_profile_id == expected_app_profile_id assert table.client is client full_instance_name = client._gapic_client.instance_path( client.project, expected_instance_id ) + full_table_name = client._gapic_client.table_path( + client.project, expected_instance_id, expected_table_id + ) + assert table.instance_path == full_instance_name + assert table.table_path == full_table_name assert full_instance_name in client._active_instances await client.close() @@ -674,13 +677,16 @@ async def test_table_ctor(): expected_app_profile_id, ) await asyncio.sleep(0) - assert table.table_id == expected_table_id - assert table.instance == expected_instance_id assert table.app_profile_id == expected_app_profile_id assert table.client is client full_instance_name = client._gapic_client.instance_path( client.project, expected_instance_id ) + full_table_name = client._gapic_client.table_path( + client.project, expected_instance_id, expected_table_id + ) + assert table.instance_path == full_instance_name + assert table.table_path == full_table_name assert full_instance_name in client._active_instances # ensure task reaches completion await table._register_instance_task @@ -695,10 +701,14 @@ def test_table_ctor_sync(): from google.cloud.bigtable.client import Table client = mock.Mock() + client._gapic_client.table_path.return_value = "table-path" + client._gapic_client.instance_path.return_value = "instance-path" with pytest.warns(RuntimeWarning) as warnings: table = Table(client, "instance-id", "table-id") assert "event loop" in str(warnings[0].message) - assert table.table_id == "table-id" - assert table.instance == "instance-id" + assert table.table_path == "table-path" + client._gapic_client.table_path.assert_called_once() + assert table.instance_path == "instance-path" + client._gapic_client.instance_path.assert_called_once() assert table.app_profile_id is None assert table.client is client diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index 15068d13c..65e90ee6f 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -5,7 +5,7 @@ from google.cloud.bigtable_v2 import ReadRowsResponse -from google.cloud.bigtable.row_merger import RowMerger, InvalidChunk +from google.cloud.bigtable.row_merger import RowMerger, InvalidChunk, StateMachine from google.cloud.bigtable.row import Row from .v2_client.test_row_merger import ReadRowsTest, TestFile @@ -47,9 +47,9 @@ async def _scenerio_stream(): yield ReadRowsResponse(chunks=[chunk]) try: - merger = RowMerger() + state = StateMachine() results = [] - async for row in merger.merge_row_stream(_scenerio_stream()): + async for row in RowMerger.merge_row_response_stream(_scenerio_stream(), state): for cell in row: cell_result = ReadRowsTest.Result( row_key=cell.row_key, @@ -60,8 +60,8 @@ async def _scenerio_stream(): label=cell.labels[0] if cell.labels else "", ) results.append(cell_result) - if not merger.state_machine.is_terminal_state(): - raise InvalidChunk("merger has partial frame after reading") + if not state.is_terminal_state(): + raise InvalidChunk("state machine has partial frame after reading") except InvalidChunk: results.append(ReadRowsTest.Result(error=True)) for expected, actual in zip_longest(test_case.results, results): @@ -73,10 +73,10 @@ async def test_out_of_order_rows(): async def _row_stream(): yield ReadRowsResponse(last_scanned_row_key=b"a") - merger = RowMerger() - merger.state_machine.last_seen_row_key = b"a" + state = StateMachine() + state.last_seen_row_key = b"a" with pytest.raises(InvalidChunk): - async for _ in merger.merge_row_stream(_row_stream()): + async for _ in RowMerger.merge_row_response_stream(_row_stream(), state): pass @@ -231,8 +231,8 @@ async def _process_chunks(*chunks): async def _row_stream(): yield ReadRowsResponse(chunks=chunks) - merger = RowMerger() + state = StateMachine() results = [] - async for row in merger.merge_row_stream(_row_stream()): + async for row in RowMerger.merge_row_response_stream(_row_stream(), state): results.append(row) return results From 770d9f552c482405454b4933f0e139bbf47bb858 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 09:57:55 -0700 Subject: [PATCH 204/349] added comments --- google/cloud/bigtable/row_merger.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 20705a02b..f5e818536 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -41,14 +41,15 @@ class InvalidChunk(RuntimeError): class RowMerger(AsyncIterable[Row]): """ - RowMerger takes in a stream of ReadRows chunks - and processes them into a stream of Rows. + RowMerger handles the logic of merging chunks from a ReadRowsResponse stream + into a stream of Row objects. - RowMerger can wrap the stream directly, or use a cache to decouple - the producer from the consumer + RowMerger.merge_row_response_stream takes in a stream of ReadRowsResponse + and handles turns them into a stream of Row objects using an internal + StateMachine. - RowMerger uses a StateMachine instance to handle the chunk parsing - logic + RowMerger(request, client) handles row merging logic end-to-end, including + performing retries on stream errors. """ def __init__( From 9f3e0c561bd7dcaa1902d086112d5dc43d08c1f1 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 10:51:31 -0700 Subject: [PATCH 205/349] added comment --- google/cloud/bigtable/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 8931d21f4..9969b0458 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -137,6 +137,7 @@ def start_background_channel_refresh(self) -> None: for channel_idx in range(len(self.transport._grpc_channel._pool)): refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) if sys.version_info >= (3, 8): + # task names supported in Python 3.8+ refresh_task.set_name( f"{self.__class__.__name__} channel refresh {channel_idx}" ) From a0620eab8cadaf6aab6ef888eb29c662b9463744 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 10:52:55 -0700 Subject: [PATCH 206/349] added random noise to refresh intervals --- google/cloud/bigtable/client.py | 18 +++++++++++---- tests/unit/test_client.py | 40 ++++++++++++++++++++++++++++----- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 9969b0458..9af2a7749 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -22,6 +22,7 @@ import time import warnings import sys +import random from google.cloud.bigtable_v2.services.bigtable.client import BigtableClientMeta from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient @@ -176,7 +177,8 @@ async def _ping_and_warm_instances( async def _manage_channel( self, channel_idx: int, - refresh_interval: float = 60 * 45, + refresh_interval_min: float = 60 * 35, + refresh_interval_max: float = 60 * 45, grace_period: float = 60 * 10, ) -> None: """ @@ -189,11 +191,18 @@ async def _manage_channel( Args: channel_idx: index of the channel in the transport's channel pool - refresh_interval: interval before initiating refresh process in seconds + refresh_interval_min: minimum interval before initiating refresh + process in seconds. Actual interval will be a random value + between `refresh_interval_min` and `refresh_interval_max` + refresh_interval_max: maximum interval before initiating refresh + process in seconds. Actual interval will be a random value + between `refresh_interval_min` and `refresh_interval_max` grace_period: time to allow previous channel to serve existing requests before closing, in seconds """ - first_refresh = self._channel_init_time + refresh_interval + first_refresh = self._channel_init_time + random.uniform( + refresh_interval_min, refresh_interval_max + ) next_sleep = max(first_refresh - time.time(), 0) if next_sleep > 0: # warm the current channel immediately @@ -211,7 +220,8 @@ async def _manage_channel( channel_idx, grace=grace_period, swap_sleep=10, new_channel=new_channel ) # subtract the time spent waiting for the channel to be replaced - next_sleep = refresh_interval - (time.time() - start_timestamp) + next_refresh = random.uniform(refresh_interval_min, refresh_interval_max) + next_sleep = next_refresh - (time.time() - start_timestamp) async def register_instance(self, instance_id: str): """ diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2481548b1..ebe26c32e 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -330,7 +330,7 @@ async def test__manage_channel_first_sleep(refresh_interval, wait_time, expected try: client = _make_one(project="project-id") client._channel_init_time = -wait_time - await client._manage_channel(0, refresh_interval) + await client._manage_channel(0, refresh_interval, refresh_interval) except asyncio.CancelledError: pass sleep.assert_called_once() @@ -378,7 +378,7 @@ async def test__manage_channel_ping_and_warm(): type(_make_one()), "_ping_and_warm_instances" ) as ping_and_warm: try: - await client._manage_channel(0, 0) + await client._manage_channel(0, 0, 0) except asyncio.CancelledError: pass ping_and_warm.assert_called_once_with(new_channel) @@ -389,7 +389,7 @@ async def test__manage_channel_ping_and_warm(): @pytest.mark.parametrize( "refresh_interval, num_cycles, expected_sleep", [ - (None, 1, 60 * 45), + (None, 1, 60 * 35), (10, 10, 100), (10, 1, 10), ], @@ -397,8 +397,11 @@ async def test__manage_channel_ping_and_warm(): async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sleep): # make sure that sleeps work as expected import time + import random channel_idx = 1 + random.uniform = mock.Mock() + random.uniform.side_effect = lambda min_, max_: min_ with mock.patch.object(time, "time") as time: time.return_value = 0 with mock.patch.object(asyncio, "sleep") as sleep: @@ -408,7 +411,7 @@ async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sle try: client = _make_one(project="project-id") if refresh_interval is not None: - await client._manage_channel(channel_idx, refresh_interval) + await client._manage_channel(channel_idx, refresh_interval, refresh_interval) else: await client._manage_channel(channel_idx) except asyncio.CancelledError: @@ -420,6 +423,32 @@ async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sle ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" await client.close() +@pytest.mark.asyncio +async def test__manage_channel_random(): + import random + with mock.patch.object(asyncio, "sleep") as sleep: + with mock.patch.object(random, "uniform") as uniform: + uniform.return_value = 0 + try: + uniform.side_effect = asyncio.CancelledError + client = _make_one(project="project-id", pool_size=1) + except asyncio.CancelledError: + uniform.side_effect = None + uniform.reset_mock() + sleep.reset_mock() + min_val = 200 + max_val = 205 + uniform.side_effect = lambda min_, max_: min_ + sleep.side_effect = [None, None, asyncio.CancelledError] + try: + await client._manage_channel(0, min_val, max_val) + except asyncio.CancelledError: + pass + assert uniform.call_count == 2 + uniform_args = [call[0] for call in uniform.call_args_list] + for found_min, found_max in uniform_args: + assert found_min == min_val + assert found_max == max_val @pytest.mark.asyncio @pytest.mark.parametrize("num_cycles", [0, 1, 10, 100]) @@ -451,7 +480,8 @@ async def test__manage_channel_refresh(num_cycles): try: await client._manage_channel( channel_idx, - refresh_interval=expected_refresh, + refresh_interval_min=expected_refresh, + refresh_interval_max=expected_refresh, grace_period=expected_grace, ) except asyncio.CancelledError: From 4f5ed465809d9f8377b7153d45f4c5f9ba405df1 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 11:03:20 -0700 Subject: [PATCH 207/349] improving comments; clean up --- google/cloud/bigtable/row_merger.py | 32 +++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index f5e818536..80823aeb4 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -63,18 +63,18 @@ def __init__( per_request_timeout: float | None = None, revise_on_retry: bool = True, ): - self.revise_on_retry = revise_on_retry self.last_seen_row_key: bytes | None = None self.emitted_rows: Set[bytes] = set() self.request = request # lock in paramters for retryable wrapper partial_retryable = partial( + client.read_rows, self.retryable_merge_rows, cache_size, per_row_timeout, per_request_timeout, - client.read_rows, + revise_on_retry, ) retry = retries.AsyncRetry( @@ -97,8 +97,9 @@ async def __aiter__(self): async def __anext__(self): return await self.retryable_stream().__anext__() + @staticmethod async def _generator_to_cache( - self, cache: asyncio.Queue[Any], input_generator: AsyncIterable[Any] + cache: asyncio.Queue[Any], input_generator: AsyncIterable[Any] ) -> None: """ Helper function to push items from an async generator into a cache @@ -107,11 +108,24 @@ async def _generator_to_cache( await cache.put(item) async def retryable_merge_rows( - self, cache_size, per_row_timeout, per_request_timeout, gapic_fn - ): - if self.revise_on_retry and self.last_seen_row_key is not None: + self, gapic_fn, cache_size, per_row_timeout, per_request_timeout, revise_on_retry + ) -> AsyncGenerator[Row | RequestStats, None]: + """ + Retryable wrapper for merge_rows. This function is called each time + a retry is attempted. + + Some fresh state is created on each retry: + - grpc network stream + - cache for the stream + - state machine to hold merge chunks received from stream + Some state is shared between retries: + - last_seen_row_key and emitted_rows are used to ensure that + duplicate rows are not emitted + - request is stored and (optionally) modified on each retry + """ + if revise_on_retry and self.last_seen_row_key is not None: # if this is a retry, try to trim down the request to avoid ones we've already processed - self.request["rows"] = self._revise_request_rowset( + self.request["rows"] = RowMerger._revise_request_rowset( self.request.get("rows", None), self.last_seen_row_key, self.emitted_rows, @@ -120,8 +134,8 @@ async def retryable_merge_rows( cache: asyncio.Queue[Row | RequestStats] = asyncio.Queue(cache_size) state_machine = StateMachine() stream_task = asyncio.create_task( - self._generator_to_cache( - cache, self.merge_row_response_stream(new_gapic_stream, state_machine) + RowMerger._generator_to_cache( + cache, RowMerger.merge_row_response_stream(new_gapic_stream, state_machine) ) ) try: From c169ba87f5a48a383dac4b6f45a2c3b68c3eedf4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 11:16:38 -0700 Subject: [PATCH 208/349] fixed param order --- google/cloud/bigtable/row_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 80823aeb4..6d8c857e2 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -69,8 +69,8 @@ def __init__( # lock in paramters for retryable wrapper partial_retryable = partial( - client.read_rows, self.retryable_merge_rows, + client.read_rows, cache_size, per_row_timeout, per_request_timeout, From 9ec3697f124abfa8aa6bcb9183768e433376e761 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 12:29:22 -0700 Subject: [PATCH 209/349] working on getting end-to-end read_rows working --- google/cloud/bigtable/client.py | 28 ++++++------ google/cloud/bigtable/row_merger.py | 26 +++++------ tests/unit/test_client_read_rows.py | 68 +++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 29 deletions(-) create mode 100644 tests/unit/test_client_read_rows.py diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index c6b77a5fc..8cf40be1a 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -46,11 +46,11 @@ import google.auth._default from google.api_core import client_options as client_options_lib from google.cloud.bigtable.row import Row +from google.cloud.bigtable.read_rows_query import ReadRowsQuery if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry from google.cloud.bigtable.mutations_batcher import MutationsBatcher - from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable import RowKeySamples from google.cloud.bigtable.row_filters import RowFilter from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule @@ -391,19 +391,19 @@ async def read_rows_stream( # - RowMerger.merge_row_response_stream: parses chunks into rows # - RowMerger.retryable_merge_rows: adds retries, caching, revised requests, per_row_timeout, per_row_timeout # - ReadRowsIterator: adds idle_timeout, moves stats out of stream and into attribute - generator = ReadRowsIterator( - RowMerger( - request, - self.client._gapic_client, - cache_size=cache_size, - operation_timeout=operation_timeout, - per_row_timeout=per_row_timeout, - per_request_timeout=per_request_timeout, - ) + row_merger = RowMerger() + row_merge_gen = await row_merger.start_row_merge( + request, + self.client._gapic_client, + cache_size=cache_size, + operation_timeout=operation_timeout, + per_row_timeout=per_row_timeout, + per_request_timeout=per_request_timeout, ) + output_generator = ReadRowsIterator(row_merge_gen) # add idle timeout to clear resources if generator is abandoned - await generator._start_idle_timer(600) - return generator + await output_generator._start_idle_timer(600) + return output_generator async def read_rows( self, @@ -704,7 +704,7 @@ async def _idle_timeout_coroutine(self, idle_timeout: float): # idle timeout has expired self.last_raised = IdleTimeout("idle timeout expired") - async def __aiter__(self): + def __aiter__(self): return self async def __anext__(self) -> Row: @@ -712,7 +712,7 @@ async def __anext__(self) -> Row: raise self.last_raised try: self.last_interaction_time = time.time() - next_item = await self.stream.__aiter__().__anext__() + next_item = await self.stream.__anext__() if isinstance(next_item, RequestStats): self.request_stats = next_item return await self.__anext__() diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 6d8c857e2..5e16f2ec7 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -39,7 +39,7 @@ class InvalidChunk(RuntimeError): """Exception raised to invalid chunk data from back-end.""" -class RowMerger(AsyncIterable[Row]): +class RowMerger(): """ RowMerger handles the logic of merging chunks from a ReadRowsResponse stream into a stream of Row objects. @@ -52,7 +52,11 @@ class RowMerger(AsyncIterable[Row]): performing retries on stream errors. """ - def __init__( + def __init__(self): + self.last_seen_row_key: bytes | None = None + self.emitted_rows: Set[bytes] = set() + + async def start_row_merge( self, request: dict[str, Any], client: BigtableAsyncClient, @@ -62,11 +66,10 @@ def __init__( per_row_timeout: float | None = None, per_request_timeout: float | None = None, revise_on_retry: bool = True, - ): - self.last_seen_row_key: bytes | None = None - self.emitted_rows: Set[bytes] = set() + ) -> AsyncGenerator[Row|RequestStats, None]: + if cache_size is None: + cache_size = 0 self.request = request - # lock in paramters for retryable wrapper partial_retryable = partial( self.retryable_merge_rows, @@ -76,7 +79,6 @@ def __init__( per_request_timeout, revise_on_retry, ) - retry = retries.AsyncRetry( predicate=retries.if_exception_type( InvalidChunk, @@ -89,13 +91,7 @@ def __init__( maximum=1, is_generator=True, ) - self.retryable_stream = retry(partial_retryable) - - async def __aiter__(self): - return self - - async def __anext__(self): - return await self.retryable_stream().__anext__() + return retry(partial_retryable)() @staticmethod async def _generator_to_cache( @@ -108,7 +104,7 @@ async def _generator_to_cache( await cache.put(item) async def retryable_merge_rows( - self, gapic_fn, cache_size, per_row_timeout, per_request_timeout, revise_on_retry + self, gapic_fn, cache_size, per_row_timeout, per_request_timeout, revise_on_retry ) -> AsyncGenerator[Row | RequestStats, None]: """ Retryable wrapper for merge_rows. This function is called each time diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py new file mode 100644 index 000000000..3dd6e467c --- /dev/null +++ b/tests/unit/test_client_read_rows.py @@ -0,0 +1,68 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import asyncio + +import pytest + +from google.cloud.bigtable_v2.types import ReadRowsResponse +from google.cloud.bigtable.read_rows_query import ReadRowsQuery + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock # type: ignore +except ImportError: # pragma: NO COVER + import mock # type: ignore + from mock import AsyncMock # type: ignore + + +def _make_client(*args, **kwargs): + from google.cloud.bigtable.client import BigtableDataClient + + return BigtableDataClient(*args, **kwargs) + + +def _make_chunk(*args, **kwargs): + from google.cloud.bigtable_v2 import ReadRowsResponse + + kwargs["row_key"] = kwargs.get("row_key", b"row_key") + kwargs["family_name"] = kwargs.get("family_name", "family_name") + kwargs["qualifier"] = kwargs.get("qualifier", b"qualifier") + kwargs["value"] = kwargs.get("value", b"value") + kwargs["commit_row"] = kwargs.get("commit_row", True) + + return ReadRowsResponse.CellChunk(*args, **kwargs) + +async def _make_gapic_stream(chunk_list: list[ReadRowsResponse]): + from google.cloud.bigtable_v2 import ReadRowsResponse + async def inner(): + for chunk in chunk_list: + yield ReadRowsResponse(chunks=[chunk]) + return inner() + +@pytest.mark.asyncio +async def test_read_rows_stream(): + client = _make_client() + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [_make_chunk()] + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) + gen = await table.read_rows_stream(query, operation_timeout=3) + breakpoint() + async for row in gen: + print(row) + await client.close() From b6873e88b36e0c79c1b6bbf37eefb79372eba8f9 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 12:47:38 -0700 Subject: [PATCH 210/349] fixed issue in pulling from cache --- google/cloud/bigtable/row_merger.py | 24 ++++++++++++++++-------- tests/unit/test_client_read_rows.py | 1 - 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 5e16f2ec7..7ca987151 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -137,8 +137,24 @@ async def retryable_merge_rows( try: # read from state machine and push into cache while not stream_task.done() or not cache.empty(): + new_item = None if not cache.empty(): new_item = await cache.get() + else: + # wait for either the stream to finish, or a new item to enter the cache + get_from_cache = asyncio.create_task(cache.get()) + get_from_cache_w_timeout = asyncio.wait_for( + get_from_cache, per_row_timeout + ) + first_finish = asyncio.wait( + [stream_task, get_from_cache_w_timeout], + return_when=asyncio.FIRST_COMPLETED, + ) + await asyncio.wait_for(first_finish, per_row_timeout) + if get_from_cache.done(): + new_item = get_from_cache.result() + # if we found an item this loop, yield it + if new_item is not None: # don't yield rows that have already been emitted if isinstance(new_item, RequestStats): yield new_item @@ -152,14 +168,6 @@ async def retryable_merge_rows( if not isinstance(new_item, _LastScannedRow): self.emitted_rows.add(new_item.row_key) yield new_item - else: - # wait for either the stream to finish, or a new item to enter the cache - get_from_cache = asyncio.wait_for(cache.get(), per_row_timeout) - first_finish = asyncio.wait( - [stream_task, get_from_cache], - return_when=asyncio.FIRST_COMPLETED, - ) - await asyncio.wait_for(first_finish, per_row_timeout) # stream and cache are complete. if there's an exception, raise it if stream_task.exception(): raise cast(Exception, stream_task.exception()) diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 3dd6e467c..1089aa5c7 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -62,7 +62,6 @@ async def test_read_rows_stream(): with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) gen = await table.read_rows_stream(query, operation_timeout=3) - breakpoint() async for row in gen: print(row) await client.close() From 2facc79b2550f6233293578c3dcb22fb56f2dac6 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 12:50:38 -0700 Subject: [PATCH 211/349] added timeout to results generator --- google/cloud/bigtable/client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 8cf40be1a..63ce8b0d4 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -692,6 +692,8 @@ async def _start_idle_timer(self, idle_timeout: float): self._idle_timeout_task = asyncio.create_task( self._idle_timeout_coroutine(idle_timeout) ) + if sys.version_info >= (3, 8): + self._idle_timeout_task.name = "ReadRowsIterator._idle_timeout" async def _idle_timeout_coroutine(self, idle_timeout: float): while self.last_raised is None: @@ -720,6 +722,7 @@ async def __anext__(self) -> Row: return next_item except Exception as e: self.last_raised = e + self._idle_timeout_task.cancel() raise e From ee826bb6c3304af758c6861c08ad0d2f6913ab5b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 14:05:20 -0700 Subject: [PATCH 212/349] added acceptance tests for read_rows --- tests/unit/test_client_read_rows.py | 2 ++ tests/unit/test_read_rows_acceptance.py | 46 ++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 1089aa5c7..58226174f 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -46,6 +46,7 @@ def _make_chunk(*args, **kwargs): return ReadRowsResponse.CellChunk(*args, **kwargs) + async def _make_gapic_stream(chunk_list: list[ReadRowsResponse]): from google.cloud.bigtable_v2 import ReadRowsResponse async def inner(): @@ -53,6 +54,7 @@ async def inner(): yield ReadRowsResponse(chunks=[chunk]) return inner() + @pytest.mark.asyncio async def test_read_rows_stream(): client = _make_client() diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index 65e90ee6f..5a850189b 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -5,11 +5,19 @@ from google.cloud.bigtable_v2 import ReadRowsResponse +from google.cloud.bigtable.client import BigtableDataClient from google.cloud.bigtable.row_merger import RowMerger, InvalidChunk, StateMachine from google.cloud.bigtable.row import Row from .v2_client.test_row_merger import ReadRowsTest, TestFile +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock # type: ignore +except ImportError: # pragma: NO COVER + import mock # type: ignore + from mock import AsyncMock # type: ignore def parse_readrows_acceptance_tests(): dirname = os.path.dirname(__file__) @@ -41,7 +49,7 @@ def extract_results_from_row(row: Row): "test_case", parse_readrows_acceptance_tests(), ids=lambda t: t.description ) @pytest.mark.asyncio -async def test_scenario(test_case: ReadRowsTest): +async def test_row_merger_scenario(test_case: ReadRowsTest): async def _scenerio_stream(): for chunk in test_case.chunks: yield ReadRowsResponse(chunks=[chunk]) @@ -67,6 +75,42 @@ async def _scenerio_stream(): for expected, actual in zip_longest(test_case.results, results): assert actual == expected +@pytest.mark.parametrize( + "test_case", parse_readrows_acceptance_tests(), ids=lambda t: t.description +) +@pytest.mark.asyncio +async def test_read_rows_scenario(test_case: ReadRowsTest): + async def _make_gapic_stream(chunk_list: list[ReadRowsResponse]): + from google.cloud.bigtable_v2 import ReadRowsResponse + async def inner(): + for chunk in chunk_list: + yield ReadRowsResponse(chunks=[chunk]) + return inner() + try: + client = BigtableDataClient() + table = client.get_table("instance", "table") + results = [] + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(test_case.chunks) + async for row in await table.read_rows_stream(query={}, operation_timeout=0.5): + for cell in row: + cell_result = ReadRowsTest.Result( + row_key=cell.row_key, + family_name=cell.family, + qualifier=cell.column_qualifier, + timestamp_micros=cell.timestamp_micros, + value=cell.value, + label=cell.labels[0] if cell.labels else "", + ) + results.append(cell_result) + except Exception as e: + assert isinstance(e.cause, InvalidChunk) + results.append(ReadRowsTest.Result(error=True)) + finally: + await client.close() + for expected, actual in zip_longest(test_case.results, results): + assert actual == expected + @pytest.mark.asyncio async def test_out_of_order_rows(): From 25af0c0b9d5a8147311306d70d82b2e59381c7cc Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 14:47:18 -0700 Subject: [PATCH 213/349] adding tests --- google/cloud/bigtable/client.py | 9 ++-- google/cloud/bigtable/row_merger.py | 7 ++-- tests/unit/test_client_read_rows.py | 64 ++++++++++++++++++++++++++--- 3 files changed, 67 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 63ce8b0d4..ac1c5dd3c 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -342,7 +342,7 @@ async def read_rows_stream( self, query: ReadRowsQuery | dict[str, Any], *, - cache_size: int | None = None, + cache_size: int = 0, operation_timeout: int | float | None = 60, per_row_timeout: int | float | None = 10, per_request_timeout: int | float | None = None, @@ -359,8 +359,8 @@ async def read_rows_stream( Args: - query: contains details about which rows to return - - cache_size: the number of rows to cache in memory. If None, no limits. - Defaults to None + - cache_size: the number of rows to cache in memory. If less than + or equal to 0, cache is unbounded. Defaults to 0 (unbounded) - operation_timeout: the time budget for the entire operation, in seconds. Failed requests will be retried within the budget. time is only counted while actively waiting on the network. @@ -384,7 +384,8 @@ async def read_rows_stream( """ request = query._to_dict() if isinstance(query, ReadRowsQuery) else query request["table_name"] = self.table_path - request["app_profile_id"] = self.app_profile_id + if self.app_profile_id: + request["app_profile_id"] = self.app_profile_id # read_rows smart retries is implemented using a series of generators: # - client.read_rows: outputs raw ReadRowsResponse objects from backend. Has per_request_timeout diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 7ca987151..db3b77b86 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -61,14 +61,13 @@ async def start_row_merge( request: dict[str, Any], client: BigtableAsyncClient, *, - cache_size: int | None = None, + cache_size: int = 0, operation_timeout: float | None = None, per_row_timeout: float | None = None, per_request_timeout: float | None = None, revise_on_retry: bool = True, ) -> AsyncGenerator[Row|RequestStats, None]: - if cache_size is None: - cache_size = 0 + cache_size = max(cache_size, 0) self.request = request # lock in paramters for retryable wrapper partial_retryable = partial( @@ -127,7 +126,7 @@ async def retryable_merge_rows( self.emitted_rows, ) new_gapic_stream = await gapic_fn(self.request, timeout=per_request_timeout) - cache: asyncio.Queue[Row | RequestStats] = asyncio.Queue(cache_size) + cache: asyncio.Queue[Row | RequestStats] = asyncio.Queue(maxsize=cache_size) state_machine = StateMachine() stream_task = asyncio.create_task( RowMerger._generator_to_cache( diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 58226174f..5b9616a67 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -47,23 +47,77 @@ def _make_chunk(*args, **kwargs): return ReadRowsResponse.CellChunk(*args, **kwargs) -async def _make_gapic_stream(chunk_list: list[ReadRowsResponse]): +async def _make_gapic_stream(chunk_list: list[ReadRowsResponse], sleep_time=0): from google.cloud.bigtable_v2 import ReadRowsResponse async def inner(): for chunk in chunk_list: + if sleep_time: + await asyncio.sleep(sleep_time) yield ReadRowsResponse(chunks=[chunk]) return inner() @pytest.mark.asyncio -async def test_read_rows_stream(): +async def test_read_rows(): client = _make_client() table = client.get_table("instance", "table") query = ReadRowsQuery() - chunks = [_make_chunk()] + chunks = [_make_chunk(row_key=b"test_1")] with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) gen = await table.read_rows_stream(query, operation_timeout=3) - async for row in gen: - print(row) + results = [row async for row in gen] + assert len(results) == 1 + assert results[0].row_key == b"test_1" await client.close() + +@pytest.mark.parametrize("include_app_profile", [True, False]) +@pytest.mark.asyncio +async def test_read_rows_query_matches_request(include_app_profile): + from google.cloud.bigtable import RowRange + async with _make_client() as client: + app_profile_id = "app_profile_id" if include_app_profile else None + table = client.get_table("instance", "table", app_profile_id=app_profile_id) + row_keys = [b"test_1", "test_2"] + row_ranges = RowRange('start', 'end') + filter_ = {'test': 'filter'} + limit = 99 + query = ReadRowsQuery(row_keys=row_keys, row_ranges=row_ranges, row_filter=filter_, limit=limit) + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream([]) + gen = await table.read_rows_stream(query, operation_timeout=3) + results = [row async for row in gen] + assert len(results) == 0 + call_request = read_rows.call_args_list[0][0][0] + query_dict = query._to_dict() + if include_app_profile: + assert set(call_request.keys()) == set(query_dict.keys()) | {'table_name', 'app_profile_id'} + else: + assert set(call_request.keys()) == set(query_dict.keys()) | {"table_name"} + assert call_request['rows'] == query_dict['rows'] + assert call_request['filter'] == filter_ + assert call_request['rows_limit'] == limit + assert call_request['table_name'] == table.table_path + if include_app_profile: + assert call_request['app_profile_id'] == app_profile_id + + +@pytest.mark.parametrize("input_cache_size, expected_cache_size", + [(-100, 0), (-1, 0), (0, 0), (1, 1), (2, 2), (100, 100), (101, 101)]) +@pytest.mark.asyncio +async def test_read_rows_cache_size(input_cache_size, expected_cache_size): + async with _make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [_make_chunk(row_key=b"test_1")] + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) + with mock.patch.object(asyncio, "Queue") as queue: + queue.side_effect = asyncio.CancelledError + try: + gen = await table.read_rows_stream(query, operation_timeout=3, cache_size=input_cache_size) + [row async for row in gen] + except asyncio.CancelledError: + pass + queue.assert_called_once_with(maxsize=expected_cache_size) + From 2f7778dd746f02999a6752b2673235780e8410bf Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 15:33:01 -0700 Subject: [PATCH 214/349] got operation deadline error working properly --- google/cloud/bigtable/client.py | 32 ++++++++++++++++++++--------- google/cloud/bigtable/row_merger.py | 30 ++++++++++++++------------- tests/unit/test_client_read_rows.py | 16 +++++++++++++++ 3 files changed, 54 insertions(+), 24 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index ac1c5dd3c..ec82a131c 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -343,9 +343,9 @@ async def read_rows_stream( query: ReadRowsQuery | dict[str, Any], *, cache_size: int = 0, - operation_timeout: int | float | None = 60, - per_row_timeout: int | float | None = 10, - per_request_timeout: int | float | None = None, + operation_timeout: float = 60, + per_row_timeout: float | None = 10, + per_request_timeout: float | None = None, ) -> ReadRowsIterator: """ Returns an iterator to asynchronously stream back row data. @@ -380,8 +380,11 @@ async def read_rows_stream( - DeadlineExceeded: raised after operation timeout will be chained with a RetryExceptionGroup containing GoogleAPIError exceptions from any retries that failed + - GoogleAPIError: raised if the request encounters an unrecoverable error - IdleTimeout: if iterator was abandoned """ + if operation_timeout <= 0: + raise ValueError("operation_timeout must be greater than 0") request = query._to_dict() if isinstance(query, ReadRowsQuery) else query request["table_name"] = self.table_path if self.app_profile_id: @@ -392,8 +395,7 @@ async def read_rows_stream( # - RowMerger.merge_row_response_stream: parses chunks into rows # - RowMerger.retryable_merge_rows: adds retries, caching, revised requests, per_row_timeout, per_row_timeout # - ReadRowsIterator: adds idle_timeout, moves stats out of stream and into attribute - row_merger = RowMerger() - row_merge_gen = await row_merger.start_row_merge( + row_merger = RowMerger( request, self.client._gapic_client, cache_size=cache_size, @@ -401,7 +403,7 @@ async def read_rows_stream( per_row_timeout=per_row_timeout, per_request_timeout=per_request_timeout, ) - output_generator = ReadRowsIterator(row_merge_gen) + output_generator = ReadRowsIterator(row_merger) # add idle timeout to clear resources if generator is abandoned await output_generator._start_idle_timer(600) return output_generator @@ -679,8 +681,9 @@ class ReadRowsIterator(AsyncIterable[Row]): User-facing async generator for streaming read_rows responses """ - def __init__(self, stream: RowMerger): - self.stream: RowMerger = stream + def __init__(self, merger: RowMerger): + self.stream = merger.__aiter__() + self.merger : RowMerger = merger self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() self.last_raised: Exception | None = None @@ -721,11 +724,20 @@ async def __anext__(self) -> Row: return await self.__anext__() else: return next_item + except core_exceptions.RetryError as e: + # raised by AsyncRetry after operation deadline exceeded + new_exc = core_exceptions.DeadlineExceeded(f"operation_timeout of {self.merger.operation_timeout:0.1f}s exceeded") + self._finish_with_error(new_exc) + raise new_exc from e except Exception as e: - self.last_raised = e - self._idle_timeout_task.cancel() + self._finish_with_error(e) raise e + def _finish_with_error(self, e:Exception): + self.last_raised = e + if self._idle_timeout_task is not None: + self._idle_timeout_task.cancel() + #TODO: remove resources on completion class IdleTimeout(core_exceptions.DeadlineExceeded): pass diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index db3b77b86..82a17a43c 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -39,7 +39,7 @@ class InvalidChunk(RuntimeError): """Exception raised to invalid chunk data from back-end.""" -class RowMerger(): +class RowMerger(AsyncIterable[Row]): """ RowMerger handles the logic of merging chunks from a ReadRowsResponse stream into a stream of Row objects. @@ -52,11 +52,7 @@ class RowMerger(): performing retries on stream errors. """ - def __init__(self): - self.last_seen_row_key: bytes | None = None - self.emitted_rows: Set[bytes] = set() - - async def start_row_merge( + def __init__( self, request: dict[str, Any], client: BigtableAsyncClient, @@ -66,11 +62,14 @@ async def start_row_merge( per_row_timeout: float | None = None, per_request_timeout: float | None = None, revise_on_retry: bool = True, - ) -> AsyncGenerator[Row|RequestStats, None]: + ): + self.last_seen_row_key: bytes | None = None + self.emitted_rows: Set[bytes] = set() cache_size = max(cache_size, 0) self.request = request + self.operation_timeout = operation_timeout # lock in paramters for retryable wrapper - partial_retryable = partial( + self.partial_retryable = partial( self.retryable_merge_rows, client.read_rows, cache_size, @@ -78,19 +77,22 @@ async def start_row_merge( per_request_timeout, revise_on_retry, ) + + def __aiter__(self) -> AsyncGenerator[Row|RequestStats, None]: retry = retries.AsyncRetry( predicate=retries.if_exception_type( InvalidChunk, core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, + asyncio.TimeoutError, ), - timeout=operation_timeout, + timeout=self.operation_timeout, initial=0.1, multiplier=2, maximum=1, is_generator=True, ) - return retry(partial_retryable)() + return retry(self.partial_retryable)() @staticmethod async def _generator_to_cache( @@ -142,11 +144,8 @@ async def retryable_merge_rows( else: # wait for either the stream to finish, or a new item to enter the cache get_from_cache = asyncio.create_task(cache.get()) - get_from_cache_w_timeout = asyncio.wait_for( - get_from_cache, per_row_timeout - ) first_finish = asyncio.wait( - [stream_task, get_from_cache_w_timeout], + [stream_task, get_from_cache], return_when=asyncio.FIRST_COMPLETED, ) await asyncio.wait_for(first_finish, per_row_timeout) @@ -170,6 +169,9 @@ async def retryable_merge_rows( # stream and cache are complete. if there's an exception, raise it if stream_task.exception(): raise cast(Exception, stream_task.exception()) + except asyncio.TimeoutError: + # per_row_timeout from asyncio.wait_for + raise core_exceptions.DeadlineExceeded("per_row_timeout of {per_row_timeout:0.1f}s exceeded") finally: stream_task.cancel() diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 5b9616a67..728831582 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -121,3 +121,19 @@ async def test_read_rows_cache_size(input_cache_size, expected_cache_size): pass queue.assert_called_once_with(maxsize=expected_cache_size) +@pytest.mark.parametrize("operation_timeout", [0.001, 0.023, 0.1]) +@pytest.mark.asyncio +async def test_read_rows_operation_timeout(operation_timeout): + from google.api_core import exceptions as core_exceptions + async with _make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [_make_chunk(row_key=b"test_1")] + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks, sleep_time=1) + gen = await table.read_rows_stream(query, operation_timeout=operation_timeout) + try: + [row async for row in gen] + except core_exceptions.DeadlineExceeded as e: + assert e.message == f"operation_timeout of {operation_timeout:0.1f}s exceeded" + From d6b8e6becc12c5dd8d6d6bd8d52623074daa0884 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 15:46:23 -0700 Subject: [PATCH 215/349] made RowMerger back into an iterable --- google/cloud/bigtable/client.py | 3 +-- google/cloud/bigtable/row_merger.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index ec82a131c..b93394c20 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -682,7 +682,6 @@ class ReadRowsIterator(AsyncIterable[Row]): """ def __init__(self, merger: RowMerger): - self.stream = merger.__aiter__() self.merger : RowMerger = merger self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() @@ -718,7 +717,7 @@ async def __anext__(self) -> Row: raise self.last_raised try: self.last_interaction_time = time.time() - next_item = await self.stream.__anext__() + next_item = await self.merger.__anext__() if isinstance(next_item, RequestStats): self.request_stats = next_item return await self.__anext__() diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 82a17a43c..ef04886f4 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -31,6 +31,7 @@ Set, Any, AsyncIterable, + AsyncIterator, AsyncGenerator, ) @@ -77,8 +78,6 @@ def __init__( per_request_timeout, revise_on_retry, ) - - def __aiter__(self) -> AsyncGenerator[Row|RequestStats, None]: retry = retries.AsyncRetry( predicate=retries.if_exception_type( InvalidChunk, @@ -92,7 +91,13 @@ def __aiter__(self) -> AsyncGenerator[Row|RequestStats, None]: maximum=1, is_generator=True, ) - return retry(self.partial_retryable)() + self.stream = retry(self.partial_retryable)() + + def __aiter__(self) -> AsyncIterator[Row|RequestStats]: + return self + + async def __anext__(self) -> Row | RequestStats: + return await self.stream.__anext__() @staticmethod async def _generator_to_cache( From 3f085a98f8af1facce7d61b898fda352e56002f2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 17:22:28 -0700 Subject: [PATCH 216/349] added test for per-row timeout --- google/cloud/bigtable/client.py | 5 ++++- google/cloud/bigtable/exceptions.py | 7 ++++++- google/cloud/bigtable/row_merger.py | 18 +++++++++++++----- tests/unit/test_client_read_rows.py | 24 ++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index b93394c20..70a1b8e54 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -47,6 +47,7 @@ from google.api_core import client_options as client_options_lib from google.cloud.bigtable.row import Row from google.cloud.bigtable.read_rows_query import ReadRowsQuery +from google.cloud.bigtable.exceptions import RetryExceptionGroup if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry @@ -726,8 +727,10 @@ async def __anext__(self) -> Row: except core_exceptions.RetryError as e: # raised by AsyncRetry after operation deadline exceeded new_exc = core_exceptions.DeadlineExceeded(f"operation_timeout of {self.merger.operation_timeout:0.1f}s exceeded") + retry_errors = RetryExceptionGroup(f"{len(self.merger.errors)} failed attempts", self.merger.errors) + new_exc.__cause__ = retry_errors self._finish_with_error(new_exc) - raise new_exc from e + raise new_exc from retry_errors except Exception as e: self._finish_with_error(e) raise e diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 86bfe9247..7f37f438c 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -29,7 +29,12 @@ class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # t """ def __init__(self, message, excs): - raise NotImplementedError() + if is_311_plus: + super().__init__(message, excs) + else: + self.exceptions = excs + revised_message = f"{message} ({len(excs)} sub-exceptions)" + super().__init__(revised_message) class MutationsExceptionGroup(BigtableExceptionGroup): diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index ef04886f4..3fc42d84b 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -78,20 +78,28 @@ def __init__( per_request_timeout, revise_on_retry, ) + self.retryable_errors = ( + InvalidChunk, + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + ) retry = retries.AsyncRetry( predicate=retries.if_exception_type( - InvalidChunk, - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - asyncio.TimeoutError, + *self.retryable_errors ), timeout=self.operation_timeout, initial=0.1, multiplier=2, maximum=1, + on_error=self._on_error, is_generator=True, ) self.stream = retry(self.partial_retryable)() + self.errors: List[Exception] = [] + + def _on_error(self, exc: Exception) -> None: + if type(exc) in self.retryable_errors: + self.errors.append(exc) def __aiter__(self) -> AsyncIterator[Row|RequestStats]: return self @@ -176,7 +184,7 @@ async def retryable_merge_rows( raise cast(Exception, stream_task.exception()) except asyncio.TimeoutError: # per_row_timeout from asyncio.wait_for - raise core_exceptions.DeadlineExceeded("per_row_timeout of {per_row_timeout:0.1f}s exceeded") + raise core_exceptions.DeadlineExceeded(f"per_row_timeout of {per_row_timeout:0.1f}s exceeded") finally: stream_task.cancel() diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 728831582..fc1e7da28 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -137,3 +137,27 @@ async def test_read_rows_operation_timeout(operation_timeout): except core_exceptions.DeadlineExceeded as e: assert e.message == f"operation_timeout of {operation_timeout:0.1f}s exceeded" +@pytest.mark.parametrize("per_row_t, operation_t, expected_num", + [(0.01, 0.015, 1), (0.05, 0.54, 10), (0.05, 0.14, 2), (0.05, 0.21, 4)] +) +@pytest.mark.asyncio +async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): + from google.api_core import exceptions as core_exceptions + # mocking uniform ensures there are no sleeps between retries + with mock.patch("random.uniform", side_effect=lambda a,b: 0): + async with _make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [_make_chunk(row_key=b"test_1")] + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks, sleep_time=5) + gen = await table.read_rows_stream(query, per_row_timeout=per_row_t, operation_timeout=operation_t) + try: + [row async for row in gen] + except core_exceptions.DeadlineExceeded as deadline_exc: + retry_exc = deadline_exc.__cause__ + assert f"{expected_num} failed attempts" in str(retry_exc) + assert len(retry_exc.exceptions) == expected_num + for sub_exc in retry_exc.exceptions: + assert sub_exc.message == f"per_row_timeout of {per_row_t:0.1f}s exceeded" + From 6abb9d443d3f96822688784e28ac2ed12b59b681 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 17:28:11 -0700 Subject: [PATCH 217/349] don't attach retry errors if there are none --- google/cloud/bigtable/client.py | 8 +++++--- tests/unit/test_client_read_rows.py | 13 ++++++++----- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 70a1b8e54..a40a68ca0 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -727,10 +727,12 @@ async def __anext__(self) -> Row: except core_exceptions.RetryError as e: # raised by AsyncRetry after operation deadline exceeded new_exc = core_exceptions.DeadlineExceeded(f"operation_timeout of {self.merger.operation_timeout:0.1f}s exceeded") - retry_errors = RetryExceptionGroup(f"{len(self.merger.errors)} failed attempts", self.merger.errors) - new_exc.__cause__ = retry_errors + source_exc = None + if self.merger.errors: + source_exc = RetryExceptionGroup(f"{len(self.merger.errors)} failed attempts", self.merger.errors) + new_exc.__cause__ = source_exc self._finish_with_error(new_exc) - raise new_exc from retry_errors + raise new_exc from source_exc except Exception as e: self._finish_with_error(e) raise e diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index fc1e7da28..fc3cb48cf 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -138,7 +138,7 @@ async def test_read_rows_operation_timeout(operation_timeout): assert e.message == f"operation_timeout of {operation_timeout:0.1f}s exceeded" @pytest.mark.parametrize("per_row_t, operation_t, expected_num", - [(0.01, 0.015, 1), (0.05, 0.54, 10), (0.05, 0.14, 2), (0.05, 0.21, 4)] + [(0.1, 0.01, 0), (0.01, 0.015, 1), (0.05, 0.54, 10), (0.05, 0.14, 2), (0.05, 0.21, 4)] ) @pytest.mark.asyncio async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): @@ -156,8 +156,11 @@ async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): [row async for row in gen] except core_exceptions.DeadlineExceeded as deadline_exc: retry_exc = deadline_exc.__cause__ - assert f"{expected_num} failed attempts" in str(retry_exc) - assert len(retry_exc.exceptions) == expected_num - for sub_exc in retry_exc.exceptions: - assert sub_exc.message == f"per_row_timeout of {per_row_t:0.1f}s exceeded" + if expected_num == 0: + assert retry_exc is None + else: + assert f"{expected_num} failed attempts" in str(retry_exc) + assert len(retry_exc.exceptions) == expected_num + for sub_exc in retry_exc.exceptions: + assert sub_exc.message == f"per_row_timeout of {per_row_t:0.1f}s exceeded" From 128320c29e04ca6dd5dcba6d6d70fe04edfd662c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 5 Apr 2023 18:33:34 -0700 Subject: [PATCH 218/349] added tests for per_request_timeout --- tests/unit/test_client_read_rows.py | 40 +++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index fc3cb48cf..7f0060452 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -47,13 +47,16 @@ def _make_chunk(*args, **kwargs): return ReadRowsResponse.CellChunk(*args, **kwargs) -async def _make_gapic_stream(chunk_list: list[ReadRowsResponse], sleep_time=0): +async def _make_gapic_stream(chunk_list: list[ReadRowsResponse.CellChunk|Exception], sleep_time=0): from google.cloud.bigtable_v2 import ReadRowsResponse async def inner(): for chunk in chunk_list: if sleep_time: await asyncio.sleep(sleep_time) - yield ReadRowsResponse(chunks=[chunk]) + if isinstance(chunk, Exception): + raise chunk + else: + yield ReadRowsResponse(chunks=[chunk]) return inner() @@ -143,6 +146,7 @@ async def test_read_rows_operation_timeout(operation_timeout): @pytest.mark.asyncio async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): from google.api_core import exceptions as core_exceptions + from google.cloud.bigtable.exceptions import RetryExceptionGroup # mocking uniform ensures there are no sleeps between retries with mock.patch("random.uniform", side_effect=lambda a,b: 0): async with _make_client() as client: @@ -159,8 +163,40 @@ async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): if expected_num == 0: assert retry_exc is None else: + assert type(retry_exc) == RetryExceptionGroup assert f"{expected_num} failed attempts" in str(retry_exc) assert len(retry_exc.exceptions) == expected_num for sub_exc in retry_exc.exceptions: assert sub_exc.message == f"per_row_timeout of {per_row_t:0.1f}s exceeded" +@pytest.mark.parametrize("per_request_t, operation_t, expected_num", + [(0.1, 0.01, 0), (0.01, 0.015, 1), (0.05, 0.54, 10), (0.05, 0.14, 2), (0.05, 0.21, 4)] +) +@pytest.mark.asyncio +async def test_read_rows_per_request_timeout(per_request_t, operation_t, expected_num): + from google.api_core import exceptions as core_exceptions + from google.cloud.bigtable.exceptions import RetryExceptionGroup + # mocking uniform ensures there are no sleeps between retries + with mock.patch("random.uniform", side_effect=lambda a,b: 0): + async with _make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [core_exceptions.DeadlineExceeded("mock deadline")] + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks, sleep_time=per_request_t) + gen = await table.read_rows_stream(query, operation_timeout=operation_t, per_request_timeout=per_request_t) + try: + [row async for row in gen] + except core_exceptions.DeadlineExceeded as e: + retry_exc = e.__cause__ + if expected_num == 0: + assert retry_exc is None + else: + assert type(retry_exc) == RetryExceptionGroup + assert f"{expected_num} failed attempts" in str(retry_exc) + assert len(retry_exc.exceptions) == expected_num + for sub_exc in retry_exc.exceptions: + assert sub_exc.message == f"mock deadline" + assert read_rows.call_count == expected_num+1 + called_kwargs = read_rows.call_args[1] + assert called_kwargs["timeout"] == per_request_t From a048536debe3412f5631464a5d52d1f71e4d704c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 10:14:25 -0700 Subject: [PATCH 219/349] added idle timeout test --- google/cloud/bigtable/client.py | 7 +++---- google/cloud/bigtable/exceptions.py | 3 +++ tests/unit/test_client_read_rows.py | 26 ++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index a40a68ca0..c1b48f1d4 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -48,6 +48,7 @@ from google.cloud.bigtable.row import Row from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable.exceptions import RetryExceptionGroup +from google.cloud.bigtable.exceptions import IdleTimeout if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry @@ -708,7 +709,7 @@ async def _idle_timeout_coroutine(self, idle_timeout: float): and self.last_raised is None ): # idle timeout has expired - self.last_raised = IdleTimeout("idle timeout expired") + self._finish_with_error(IdleTimeout("idle timeout expired")) def __aiter__(self): return self @@ -741,7 +742,5 @@ def _finish_with_error(self, e:Exception): self.last_raised = e if self._idle_timeout_task is not None: self._idle_timeout_task.cancel() + self._idle_timeout_task = None #TODO: remove resources on completion - -class IdleTimeout(core_exceptions.DeadlineExceeded): - pass diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 7f37f438c..720b2e413 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -15,9 +15,12 @@ import sys +from google.api_core import exceptions as core_exceptions is_311_plus = sys.version_info >= (3, 11) +class IdleTimeout(core_exceptions.DeadlineExceeded): + pass class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore # noqa: F821 """ diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 7f0060452..9f3a74688 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -200,3 +200,29 @@ async def test_read_rows_per_request_timeout(per_request_t, operation_t, expecte assert read_rows.call_count == expected_num+1 called_kwargs = read_rows.call_args[1] assert called_kwargs["timeout"] == per_request_t + +@pytest.mark.asyncio +async def test_read_rows_idle_timeout(): + from google.cloud.bigtable.client import ReadRowsIterator + from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient + from google.cloud.bigtable.exceptions import IdleTimeout + chunks = [_make_chunk(row_key=b"test_1"), _make_chunk(row_key=b"test_2")] + with mock.patch.object(BigtableAsyncClient, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream([_make_chunk(row_key=b"test_1")]) + with mock.patch.object(ReadRowsIterator, "_start_idle_timer") as start_idle_timer: + client = _make_client() + table = client.get_table("instance", "table") + query = ReadRowsQuery() + gen = await table.read_rows_stream(query) + # should start idle timer on creation + start_idle_timer.assert_called_once() + # start idle timer with our own value + await gen._start_idle_timer(0.1) + # should timeout after being abandoned + await gen.__anext__() + await asyncio.sleep(0.2) + with pytest.raises(IdleTimeout) as e: + await gen.__anext__() + assert e.value.message == "idle timeout expired" + await client.close() + From 371dd648cb2230fa08fa70868de000f57b052ec5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 10:24:13 -0700 Subject: [PATCH 220/349] remove row merger after error --- google/cloud/bigtable/client.py | 59 +++++++++++++++-------------- tests/unit/test_client_read_rows.py | 8 +++- 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index c1b48f1d4..fbad6700f 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -684,10 +684,9 @@ class ReadRowsIterator(AsyncIterable[Row]): """ def __init__(self, merger: RowMerger): - self.merger : RowMerger = merger + self._merger_or_error : RowMerger | Exception = merger self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() - self.last_raised: Exception | None = None self._idle_timeout_task: asyncio.Task[None] | None = None async def _start_idle_timer(self, idle_timeout: float): @@ -700,13 +699,16 @@ async def _start_idle_timer(self, idle_timeout: float): if sys.version_info >= (3, 8): self._idle_timeout_task.name = "ReadRowsIterator._idle_timeout" + def active(self): + return isinstance(self._merger_or_error, RowMerger) + async def _idle_timeout_coroutine(self, idle_timeout: float): - while self.last_raised is None: + while self.active(): next_timeout = self.last_interaction_time + idle_timeout await asyncio.sleep(next_timeout - time.time()) if ( self.last_interaction_time + idle_timeout < time.time() - and self.last_raised is None + and self.active() ): # idle timeout has expired self._finish_with_error(IdleTimeout("idle timeout expired")) @@ -715,32 +717,33 @@ def __aiter__(self): return self async def __anext__(self) -> Row: - if self.last_raised: - raise self.last_raised - try: - self.last_interaction_time = time.time() - next_item = await self.merger.__anext__() - if isinstance(next_item, RequestStats): - self.request_stats = next_item - return await self.__anext__() - else: - return next_item - except core_exceptions.RetryError as e: - # raised by AsyncRetry after operation deadline exceeded - new_exc = core_exceptions.DeadlineExceeded(f"operation_timeout of {self.merger.operation_timeout:0.1f}s exceeded") - source_exc = None - if self.merger.errors: - source_exc = RetryExceptionGroup(f"{len(self.merger.errors)} failed attempts", self.merger.errors) - new_exc.__cause__ = source_exc - self._finish_with_error(new_exc) - raise new_exc from source_exc - except Exception as e: - self._finish_with_error(e) - raise e + if isinstance(self._merger_or_error, Exception): + raise self._merger_or_error + else: + merger = cast(RowMerger, self._merger_or_error) + try: + self.last_interaction_time = time.time() + next_item = await merger.__anext__() + if isinstance(next_item, RequestStats): + self.request_stats = next_item + return await self.__anext__() + else: + return next_item + except core_exceptions.RetryError: + # raised by AsyncRetry after operation deadline exceeded + new_exc = core_exceptions.DeadlineExceeded(f"operation_timeout of {merger.operation_timeout:0.1f}s exceeded") + source_exc = None + if merger.errors: + source_exc = RetryExceptionGroup(f"{len(merger.errors)} failed attempts", merger.errors) + new_exc.__cause__ = source_exc + self._finish_with_error(new_exc) + raise new_exc from source_exc + except Exception as e: + self._finish_with_error(e) + raise e def _finish_with_error(self, e:Exception): - self.last_raised = e + self._merger_or_error = e if self._idle_timeout_task is not None: self._idle_timeout_task.cancel() self._idle_timeout_task = None - #TODO: remove resources on completion diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 9f3a74688..c5ab55294 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -208,7 +208,7 @@ async def test_read_rows_idle_timeout(): from google.cloud.bigtable.exceptions import IdleTimeout chunks = [_make_chunk(row_key=b"test_1"), _make_chunk(row_key=b"test_2")] with mock.patch.object(BigtableAsyncClient, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream([_make_chunk(row_key=b"test_1")]) + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) with mock.patch.object(ReadRowsIterator, "_start_idle_timer") as start_idle_timer: client = _make_client() table = client.get_table("instance", "table") @@ -221,8 +221,12 @@ async def test_read_rows_idle_timeout(): # should timeout after being abandoned await gen.__anext__() await asyncio.sleep(0.2) + # generator should be expired + assert not gen.active() + assert type(gen._merger_or_error) == IdleTimeout + assert gen._idle_timeout_task is None + await client.close() with pytest.raises(IdleTimeout) as e: await gen.__anext__() assert e.value.message == "idle timeout expired" - await client.close() From ebbaa1eb1e79f1d2f788ee8d16e00f5cb28d1e85 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 10:49:38 -0700 Subject: [PATCH 221/349] reorganized retryable_merge_rows --- google/cloud/bigtable/row_merger.py | 43 +++++++++++++++-------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 3fc42d84b..84bfaf30f 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -143,29 +143,20 @@ async def retryable_merge_rows( new_gapic_stream = await gapic_fn(self.request, timeout=per_request_timeout) cache: asyncio.Queue[Row | RequestStats] = asyncio.Queue(maxsize=cache_size) state_machine = StateMachine() - stream_task = asyncio.create_task( - RowMerger._generator_to_cache( - cache, RowMerger.merge_row_response_stream(new_gapic_stream, state_machine) - ) - ) try: + stream_task = asyncio.create_task( + RowMerger._generator_to_cache( + cache, RowMerger.merge_row_response_stream(new_gapic_stream, state_machine) + ) + ) + get_from_cache_task = asyncio.create_task(cache.get()) + # sleep to allow other tasks to run + await asyncio.sleep(0) # read from state machine and push into cache - while not stream_task.done() or not cache.empty(): - new_item = None - if not cache.empty(): - new_item = await cache.get() - else: - # wait for either the stream to finish, or a new item to enter the cache - get_from_cache = asyncio.create_task(cache.get()) - first_finish = asyncio.wait( - [stream_task, get_from_cache], - return_when=asyncio.FIRST_COMPLETED, - ) - await asyncio.wait_for(first_finish, per_row_timeout) - if get_from_cache.done(): - new_item = get_from_cache.result() - # if we found an item this loop, yield it - if new_item is not None: + # when finished, stream will be done, cache will be empty, but get_from_cache_task will still be waiting + while not stream_task.done() or not cache.empty() or get_from_cache_task.done(): + if get_from_cache_task.done(): + new_item = get_from_cache_task.result() # don't yield rows that have already been emitted if isinstance(new_item, RequestStats): yield new_item @@ -179,6 +170,15 @@ async def retryable_merge_rows( if not isinstance(new_item, _LastScannedRow): self.emitted_rows.add(new_item.row_key) yield new_item + # start new task for cache + get_from_cache_task = asyncio.create_task(cache.get()) + else: + # wait for either the stream to finish, or a new item to enter the cache + first_finish = asyncio.wait( + [stream_task, get_from_cache_task], + return_when=asyncio.FIRST_COMPLETED, + ) + await asyncio.wait_for(first_finish, per_row_timeout) # stream and cache are complete. if there's an exception, raise it if stream_task.exception(): raise cast(Exception, stream_task.exception()) @@ -187,6 +187,7 @@ async def retryable_merge_rows( raise core_exceptions.DeadlineExceeded(f"per_row_timeout of {per_row_timeout:0.1f}s exceeded") finally: stream_task.cancel() + get_from_cache_task.cancel() @staticmethod def _revise_request_rowset( From 2a3e379e40d2c395d2264f35acb62bf7f8a7e604 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 11:19:11 -0700 Subject: [PATCH 222/349] improved resource clean up on retries and expiration --- google/cloud/bigtable/client.py | 13 ++++++++----- google/cloud/bigtable/row_merger.py | 16 ++++++++++++++-- tests/unit/test_client_read_rows.py | 28 ++++++++++++++++------------ 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index fbad6700f..bdf8eca98 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -711,7 +711,7 @@ async def _idle_timeout_coroutine(self, idle_timeout: float): and self.active() ): # idle timeout has expired - self._finish_with_error(IdleTimeout("idle timeout expired")) + await self._finish_with_error(IdleTimeout("idle timeout expired")) def __aiter__(self): return self @@ -736,14 +736,17 @@ async def __anext__(self) -> Row: if merger.errors: source_exc = RetryExceptionGroup(f"{len(merger.errors)} failed attempts", merger.errors) new_exc.__cause__ = source_exc - self._finish_with_error(new_exc) + await self._finish_with_error(new_exc) raise new_exc from source_exc except Exception as e: - self._finish_with_error(e) + await self._finish_with_error(e) raise e - def _finish_with_error(self, e:Exception): - self._merger_or_error = e + async def _finish_with_error(self, e:Exception): + if isinstance(self._merger_or_error, RowMerger): + await self._merger_or_error.aclose() + del self._merger_or_error + self._merger_or_error = e if self._idle_timeout_task is not None: self._idle_timeout_task.cancel() self._idle_timeout_task = None diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 84bfaf30f..0964a366e 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -94,7 +94,7 @@ def __init__( on_error=self._on_error, is_generator=True, ) - self.stream = retry(self.partial_retryable)() + self.stream: AsyncGenerator[Row|RequestStats, None] | None = retry(self.partial_retryable)() self.errors: List[Exception] = [] def _on_error(self, exc: Exception) -> None: @@ -105,7 +105,19 @@ def __aiter__(self) -> AsyncIterator[Row|RequestStats]: return self async def __anext__(self) -> Row | RequestStats: - return await self.stream.__anext__() + if isinstance(self.stream, AsyncGenerator): + return await self.stream.__anext__() + else: + raise asyncio.InvalidStateError("stream is closed") + + async def aclose(self): + # release resources + if isinstance(self.stream, AsyncGenerator): + await self.stream.aclose() + del self.stream + self.stream = None + self.emitted_rows.clear() + self.last_seen_row_key = None @staticmethod async def _generator_to_cache( diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index c5ab55294..26584bc9c 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -206,6 +206,7 @@ async def test_read_rows_idle_timeout(): from google.cloud.bigtable.client import ReadRowsIterator from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient from google.cloud.bigtable.exceptions import IdleTimeout + from google.cloud.bigtable.row_merger import RowMerger chunks = [_make_chunk(row_key=b"test_1"), _make_chunk(row_key=b"test_2")] with mock.patch.object(BigtableAsyncClient, "read_rows") as read_rows: read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) @@ -216,17 +217,20 @@ async def test_read_rows_idle_timeout(): gen = await table.read_rows_stream(query) # should start idle timer on creation start_idle_timer.assert_called_once() - # start idle timer with our own value - await gen._start_idle_timer(0.1) - # should timeout after being abandoned - await gen.__anext__() - await asyncio.sleep(0.2) - # generator should be expired - assert not gen.active() - assert type(gen._merger_or_error) == IdleTimeout - assert gen._idle_timeout_task is None - await client.close() - with pytest.raises(IdleTimeout) as e: + with mock.patch.object(RowMerger, "aclose", AsyncMock()) as aclose: + # start idle timer with our own value + await gen._start_idle_timer(0.1) + # should timeout after being abandoned await gen.__anext__() - assert e.value.message == "idle timeout expired" + await asyncio.sleep(0.2) + # generator should be expired + assert not gen.active() + assert type(gen._merger_or_error) == IdleTimeout + assert gen._idle_timeout_task is None + await client.close() + with pytest.raises(IdleTimeout) as e: + await gen.__anext__() + assert e.value.message == "idle timeout expired" + aclose.assert_called_once() + aclose.assert_awaited() From 2e50c51671d4cbe11b90b1347eb34acddf6b3c51 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 11:53:17 -0700 Subject: [PATCH 223/349] added tests for request stats --- google/cloud/bigtable/row_merger.py | 3 +- tests/unit/test_client_read_rows.py | 50 ++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 0964a366e..a33331562 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -184,6 +184,7 @@ async def retryable_merge_rows( yield new_item # start new task for cache get_from_cache_task = asyncio.create_task(cache.get()) + asyncio.sleep(0) else: # wait for either the stream to finish, or a new item to enter the cache first_finish = asyncio.wait( @@ -258,7 +259,7 @@ async def merge_row_response_stream( yield complete_row # yield request stats if present if row_response.request_stats: - yield response_pb.request_stats + yield row_response.request_stats if not state_machine.is_terminal_state(): # read rows is complete, but there's still data in the merger raise InvalidChunk("read_rows completed with partial state remaining") diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 26584bc9c..0811a0ce3 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -19,6 +19,7 @@ from google.cloud.bigtable_v2.types import ReadRowsResponse from google.cloud.bigtable.read_rows_query import ReadRowsQuery +from google.cloud.bigtable_v2.types import RequestStats # try/except added for compatibility with python < 3.8 try: @@ -34,6 +35,21 @@ def _make_client(*args, **kwargs): return BigtableDataClient(*args, **kwargs) +def _make_stats(): + from google.cloud.bigtable_v2.types import RequestStats + from google.cloud.bigtable_v2.types import FullReadStatsView + from google.cloud.bigtable_v2.types import ReadIterationStats + + return RequestStats( + full_read_stats_view=FullReadStatsView( + read_iteration_stats=ReadIterationStats( + rows_seen_count=1, + rows_returned_count=2, + cells_seen_count=3, + cells_returned_count=4, + ) + ) + ) def _make_chunk(*args, **kwargs): from google.cloud.bigtable_v2 import ReadRowsResponse @@ -47,7 +63,11 @@ def _make_chunk(*args, **kwargs): return ReadRowsResponse.CellChunk(*args, **kwargs) -async def _make_gapic_stream(chunk_list: list[ReadRowsResponse.CellChunk|Exception], sleep_time=0): +async def _make_gapic_stream( + chunk_list: list[ReadRowsResponse.CellChunk|Exception], + request_stats: RequestStats | None = None, + sleep_time=0 +): from google.cloud.bigtable_v2 import ReadRowsResponse async def inner(): for chunk in chunk_list: @@ -57,6 +77,8 @@ async def inner(): raise chunk else: yield ReadRowsResponse(chunks=[chunk]) + if request_stats: + yield ReadRowsResponse(request_stats=request_stats) return inner() @@ -234,3 +256,29 @@ async def test_read_rows_idle_timeout(): aclose.assert_called_once() aclose.assert_awaited() + +@pytest.mark.asyncio +async def test_read_rows_request_stats(): + async with _make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [_make_chunk(row_key=b"test_1")] + stats = _make_stats() + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks, request_stats=stats) + gen = await table.read_rows_stream(query) + [row async for row in gen] + assert gen.request_stats == stats + +@pytest.mark.asyncio +async def test_read_rows_request_stats_missing(): + async with _make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [_make_chunk(row_key=b"test_1")] + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks, request_stats=None) + gen = await table.read_rows_stream(query) + [row async for row in gen] + assert gen.request_stats is None + From 0b63b2b0ad78439c033b70509ffda727a1734366 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 12:45:16 -0700 Subject: [PATCH 224/349] added tests for exceptions --- google/cloud/bigtable/row_merger.py | 12 ++++---- tests/unit/test_client_read_rows.py | 43 +++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index a33331562..214c1940f 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -78,15 +78,13 @@ def __init__( per_request_timeout, revise_on_retry, ) - self.retryable_errors = ( + self._error_predicate = retries.if_exception_type( InvalidChunk, - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, + core_exceptions.ServerError, + core_exceptions.TooManyRequests, ) retry = retries.AsyncRetry( - predicate=retries.if_exception_type( - *self.retryable_errors - ), + predicate=self._error_predicate, timeout=self.operation_timeout, initial=0.1, multiplier=2, @@ -98,7 +96,7 @@ def __init__( self.errors: List[Exception] = [] def _on_error(self, exc: Exception) -> None: - if type(exc) in self.retryable_errors: + if self._error_predicate(exc): self.errors.append(exc) def __aiter__(self) -> AsyncIterator[Row|RequestStats]: diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 0811a0ce3..e88f60330 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -20,6 +20,8 @@ from google.cloud.bigtable_v2.types import ReadRowsResponse from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable_v2.types import RequestStats +from google.api_core import exceptions as core_exceptions +from google.cloud.bigtable.row_merger import InvalidChunk # try/except added for compatibility with python < 3.8 try: @@ -149,7 +151,6 @@ async def test_read_rows_cache_size(input_cache_size, expected_cache_size): @pytest.mark.parametrize("operation_timeout", [0.001, 0.023, 0.1]) @pytest.mark.asyncio async def test_read_rows_operation_timeout(operation_timeout): - from google.api_core import exceptions as core_exceptions async with _make_client() as client: table = client.get_table("instance", "table") query = ReadRowsQuery() @@ -167,7 +168,6 @@ async def test_read_rows_operation_timeout(operation_timeout): ) @pytest.mark.asyncio async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): - from google.api_core import exceptions as core_exceptions from google.cloud.bigtable.exceptions import RetryExceptionGroup # mocking uniform ensures there are no sleeps between retries with mock.patch("random.uniform", side_effect=lambda a,b: 0): @@ -196,7 +196,6 @@ async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): ) @pytest.mark.asyncio async def test_read_rows_per_request_timeout(per_request_t, operation_t, expected_num): - from google.api_core import exceptions as core_exceptions from google.cloud.bigtable.exceptions import RetryExceptionGroup # mocking uniform ensures there are no sleeps between retries with mock.patch("random.uniform", side_effect=lambda a,b: 0): @@ -256,6 +255,44 @@ async def test_read_rows_idle_timeout(): aclose.assert_called_once() aclose.assert_awaited() +@pytest.mark.parametrize("exc_type", + [InvalidChunk, core_exceptions.DeadlineExceeded, core_exceptions.InternalServerError, + core_exceptions.ServiceUnavailable, core_exceptions.TooManyRequests, core_exceptions.ResourceExhausted] +) +@pytest.mark.asyncio +async def test_read_rows_retryable_error(exc_type): + async with _make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + expected_error = exc_type("mock error") + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream([expected_error]) + gen = await table.read_rows_stream(query, operation_timeout=0.1) + try: + [row async for row in gen] + except core_exceptions.DeadlineExceeded as e: + retry_exc = e.__cause__ + root_cause = retry_exc.exceptions[0] + assert type(root_cause) == exc_type + assert root_cause == expected_error + +@pytest.mark.parametrize("exc_type", + [core_exceptions.Cancelled, core_exceptions.PreconditionFailed, core_exceptions.NotFound, + core_exceptions.PermissionDenied, core_exceptions.Conflict, core_exceptions.Aborted]) +@pytest.mark.asyncio +async def test_read_rows_non_retryable_error(exc_type): + async with _make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + expected_error = exc_type("mock error") + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream([expected_error]) + gen = await table.read_rows_stream(query, operation_timeout=0.1) + try: + [row async for row in gen] + except exc_type as e: + assert e == expected_error + @pytest.mark.asyncio async def test_read_rows_request_stats(): From de102bbb8ef99d97ce31216a24bc4a27e615def8 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 12:51:24 -0700 Subject: [PATCH 225/349] clean up on_error --- google/cloud/bigtable/row_merger.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index 214c1940f..c453c153b 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -78,27 +78,26 @@ def __init__( per_request_timeout, revise_on_retry, ) - self._error_predicate = retries.if_exception_type( + predicate = retries.if_exception_type( InvalidChunk, core_exceptions.ServerError, core_exceptions.TooManyRequests, ) + def on_error_fn(exc): + if predicate(exc): + self.errors.append(exc) retry = retries.AsyncRetry( - predicate=self._error_predicate, + predicate=predicate, timeout=self.operation_timeout, initial=0.1, multiplier=2, maximum=1, - on_error=self._on_error, + on_error=on_error_fn, is_generator=True, ) self.stream: AsyncGenerator[Row|RequestStats, None] | None = retry(self.partial_retryable)() self.errors: List[Exception] = [] - def _on_error(self, exc: Exception) -> None: - if self._error_predicate(exc): - self.errors.append(exc) - def __aiter__(self) -> AsyncIterator[Row|RequestStats]: return self From bbdb8e6cd776f01a995aceec11efbc74a1a24b36 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 12:51:34 -0700 Subject: [PATCH 226/349] await sleep --- google/cloud/bigtable/row_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index c453c153b..dddc9f86b 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -181,7 +181,7 @@ async def retryable_merge_rows( yield new_item # start new task for cache get_from_cache_task = asyncio.create_task(cache.get()) - asyncio.sleep(0) + await asyncio.sleep(0) else: # wait for either the stream to finish, or a new item to enter the cache first_finish = asyncio.wait( From 83472dce445ffd28852ff95aa0f23be353003da2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 13:09:07 -0700 Subject: [PATCH 227/349] got tests working --- noxfile.py | 2 +- tests/unit/test_read_rows_acceptance.py | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/noxfile.py b/noxfile.py index ed69bf85e..041d474fd 100644 --- a/noxfile.py +++ b/noxfile.py @@ -178,7 +178,7 @@ def install_unittest_dependencies(session, *constraints): session.install("-e", f".[{','.join(extras)}]", *constraints) else: session.install("-e", ".", *constraints) - + session.install("-e", "./python-api-core", *constraints) def default(session): # Install all test dependencies, then install this package in-place. diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index 5a850189b..b09c7f4b6 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + import os from itertools import zip_longest @@ -92,7 +107,7 @@ async def inner(): results = [] with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(test_case.chunks) - async for row in await table.read_rows_stream(query={}, operation_timeout=0.5): + async for row in await table.read_rows_stream(query={}, operation_timeout=0.02): for cell in row: cell_result = ReadRowsTest.Result( row_key=cell.row_key, @@ -104,7 +119,9 @@ async def inner(): ) results.append(cell_result) except Exception as e: - assert isinstance(e.cause, InvalidChunk) + retry_exc = e.__cause__ + root_exc = retry_exc.exceptions[0] + assert isinstance(root_exc, InvalidChunk) results.append(ReadRowsTest.Result(error=True)) finally: await client.close() From bef40bdd5a6b5d31063a767afda0d39fd923af00 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 13:10:38 -0700 Subject: [PATCH 228/349] updated api-core --- python-api-core | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-api-core b/python-api-core index 985b13a5e..6cb3e2dc6 160000 --- a/python-api-core +++ b/python-api-core @@ -1 +1 @@ -Subproject commit 985b13a5e633958204d4fa60b0c0d840fc0351f8 +Subproject commit 6cb3e2dc6edac2b4b4c22496a3b507ceed3c5a24 From 534005aeaec27d64f1f47ec5f875fba274b885ec Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 13:15:25 -0700 Subject: [PATCH 229/349] ran blacken --- google/cloud/bigtable/client.py | 12 +- google/cloud/bigtable/exceptions.py | 2 + google/cloud/bigtable/row_merger.py | 30 +++- noxfile.py | 1 + tests/unit/test_client_read_rows.py | 183 ++++++++++++++++++------ tests/unit/test_read_rows_acceptance.py | 13 +- 6 files changed, 183 insertions(+), 58 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 92cd57db1..a2b114d20 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -686,7 +686,7 @@ class ReadRowsIterator(AsyncIterable[Row]): """ def __init__(self, merger: RowMerger): - self._merger_or_error : RowMerger | Exception = merger + self._merger_or_error: RowMerger | Exception = merger self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() self._idle_timeout_task: asyncio.Task[None] | None = None @@ -733,10 +733,14 @@ async def __anext__(self) -> Row: return next_item except core_exceptions.RetryError: # raised by AsyncRetry after operation deadline exceeded - new_exc = core_exceptions.DeadlineExceeded(f"operation_timeout of {merger.operation_timeout:0.1f}s exceeded") + new_exc = core_exceptions.DeadlineExceeded( + f"operation_timeout of {merger.operation_timeout:0.1f}s exceeded" + ) source_exc = None if merger.errors: - source_exc = RetryExceptionGroup(f"{len(merger.errors)} failed attempts", merger.errors) + source_exc = RetryExceptionGroup( + f"{len(merger.errors)} failed attempts", merger.errors + ) new_exc.__cause__ = source_exc await self._finish_with_error(new_exc) raise new_exc from source_exc @@ -744,7 +748,7 @@ async def __anext__(self) -> Row: await self._finish_with_error(e) raise e - async def _finish_with_error(self, e:Exception): + async def _finish_with_error(self, e: Exception): if isinstance(self._merger_or_error, RowMerger): await self._merger_or_error.aclose() del self._merger_or_error diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 720b2e413..20b375ead 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -19,9 +19,11 @@ is_311_plus = sys.version_info >= (3, 11) + class IdleTimeout(core_exceptions.DeadlineExceeded): pass + class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore # noqa: F821 """ Represents one or more exceptions that occur during a bulk Bigtable operation diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index dddc9f86b..c627a4702 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -83,9 +83,11 @@ def __init__( core_exceptions.ServerError, core_exceptions.TooManyRequests, ) + def on_error_fn(exc): if predicate(exc): self.errors.append(exc) + retry = retries.AsyncRetry( predicate=predicate, timeout=self.operation_timeout, @@ -95,10 +97,12 @@ def on_error_fn(exc): on_error=on_error_fn, is_generator=True, ) - self.stream: AsyncGenerator[Row|RequestStats, None] | None = retry(self.partial_retryable)() + self.stream: AsyncGenerator[Row | RequestStats, None] | None = retry( + self.partial_retryable + )() self.errors: List[Exception] = [] - def __aiter__(self) -> AsyncIterator[Row|RequestStats]: + def __aiter__(self) -> AsyncIterator[Row | RequestStats]: return self async def __anext__(self) -> Row | RequestStats: @@ -127,7 +131,12 @@ async def _generator_to_cache( await cache.put(item) async def retryable_merge_rows( - self, gapic_fn, cache_size, per_row_timeout, per_request_timeout, revise_on_retry + self, + gapic_fn, + cache_size, + per_row_timeout, + per_request_timeout, + revise_on_retry, ) -> AsyncGenerator[Row | RequestStats, None]: """ Retryable wrapper for merge_rows. This function is called each time @@ -155,7 +164,10 @@ async def retryable_merge_rows( try: stream_task = asyncio.create_task( RowMerger._generator_to_cache( - cache, RowMerger.merge_row_response_stream(new_gapic_stream, state_machine) + cache, + RowMerger.merge_row_response_stream( + new_gapic_stream, state_machine + ), ) ) get_from_cache_task = asyncio.create_task(cache.get()) @@ -163,7 +175,11 @@ async def retryable_merge_rows( await asyncio.sleep(0) # read from state machine and push into cache # when finished, stream will be done, cache will be empty, but get_from_cache_task will still be waiting - while not stream_task.done() or not cache.empty() or get_from_cache_task.done(): + while ( + not stream_task.done() + or not cache.empty() + or get_from_cache_task.done() + ): if get_from_cache_task.done(): new_item = get_from_cache_task.result() # don't yield rows that have already been emitted @@ -194,7 +210,9 @@ async def retryable_merge_rows( raise cast(Exception, stream_task.exception()) except asyncio.TimeoutError: # per_row_timeout from asyncio.wait_for - raise core_exceptions.DeadlineExceeded(f"per_row_timeout of {per_row_timeout:0.1f}s exceeded") + raise core_exceptions.DeadlineExceeded( + f"per_row_timeout of {per_row_timeout:0.1f}s exceeded" + ) finally: stream_task.cancel() get_from_cache_task.cancel() diff --git a/noxfile.py b/noxfile.py index 041d474fd..6d4b9317c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -180,6 +180,7 @@ def install_unittest_dependencies(session, *constraints): session.install("-e", ".", *constraints) session.install("-e", "./python-api-core", *constraints) + def default(session): # Install all test dependencies, then install this package in-place. diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index e88f60330..812c75844 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -37,6 +37,7 @@ def _make_client(*args, **kwargs): return BigtableDataClient(*args, **kwargs) + def _make_stats(): from google.cloud.bigtable_v2.types import RequestStats from google.cloud.bigtable_v2.types import FullReadStatsView @@ -53,6 +54,7 @@ def _make_stats(): ) ) + def _make_chunk(*args, **kwargs): from google.cloud.bigtable_v2 import ReadRowsResponse @@ -66,11 +68,12 @@ def _make_chunk(*args, **kwargs): async def _make_gapic_stream( - chunk_list: list[ReadRowsResponse.CellChunk|Exception], - request_stats: RequestStats | None = None, - sleep_time=0 + chunk_list: list[ReadRowsResponse.CellChunk | Exception], + request_stats: RequestStats | None = None, + sleep_time=0, ): from google.cloud.bigtable_v2 import ReadRowsResponse + async def inner(): for chunk in chunk_list: if sleep_time: @@ -81,6 +84,7 @@ async def inner(): yield ReadRowsResponse(chunks=[chunk]) if request_stats: yield ReadRowsResponse(request_stats=request_stats) + return inner() @@ -98,18 +102,22 @@ async def test_read_rows(): assert results[0].row_key == b"test_1" await client.close() + @pytest.mark.parametrize("include_app_profile", [True, False]) @pytest.mark.asyncio async def test_read_rows_query_matches_request(include_app_profile): from google.cloud.bigtable import RowRange + async with _make_client() as client: app_profile_id = "app_profile_id" if include_app_profile else None table = client.get_table("instance", "table", app_profile_id=app_profile_id) row_keys = [b"test_1", "test_2"] - row_ranges = RowRange('start', 'end') - filter_ = {'test': 'filter'} + row_ranges = RowRange("start", "end") + filter_ = {"test": "filter"} limit = 99 - query = ReadRowsQuery(row_keys=row_keys, row_ranges=row_ranges, row_filter=filter_, limit=limit) + query = ReadRowsQuery( + row_keys=row_keys, row_ranges=row_ranges, row_filter=filter_, limit=limit + ) with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream([]) gen = await table.read_rows_stream(query, operation_timeout=3) @@ -118,19 +126,26 @@ async def test_read_rows_query_matches_request(include_app_profile): call_request = read_rows.call_args_list[0][0][0] query_dict = query._to_dict() if include_app_profile: - assert set(call_request.keys()) == set(query_dict.keys()) | {'table_name', 'app_profile_id'} + assert set(call_request.keys()) == set(query_dict.keys()) | { + "table_name", + "app_profile_id", + } else: - assert set(call_request.keys()) == set(query_dict.keys()) | {"table_name"} - assert call_request['rows'] == query_dict['rows'] - assert call_request['filter'] == filter_ - assert call_request['rows_limit'] == limit - assert call_request['table_name'] == table.table_path + assert set(call_request.keys()) == set(query_dict.keys()) | { + "table_name" + } + assert call_request["rows"] == query_dict["rows"] + assert call_request["filter"] == filter_ + assert call_request["rows_limit"] == limit + assert call_request["table_name"] == table.table_path if include_app_profile: - assert call_request['app_profile_id'] == app_profile_id + assert call_request["app_profile_id"] == app_profile_id -@pytest.mark.parametrize("input_cache_size, expected_cache_size", - [(-100, 0), (-1, 0), (0, 0), (1, 1), (2, 2), (100, 100), (101, 101)]) +@pytest.mark.parametrize( + "input_cache_size, expected_cache_size", + [(-100, 0), (-1, 0), (0, 0), (1, 1), (2, 2), (100, 100), (101, 101)], +) @pytest.mark.asyncio async def test_read_rows_cache_size(input_cache_size, expected_cache_size): async with _make_client() as client: @@ -142,12 +157,15 @@ async def test_read_rows_cache_size(input_cache_size, expected_cache_size): with mock.patch.object(asyncio, "Queue") as queue: queue.side_effect = asyncio.CancelledError try: - gen = await table.read_rows_stream(query, operation_timeout=3, cache_size=input_cache_size) + gen = await table.read_rows_stream( + query, operation_timeout=3, cache_size=input_cache_size + ) [row async for row in gen] except asyncio.CancelledError: pass queue.assert_called_once_with(maxsize=expected_cache_size) + @pytest.mark.parametrize("operation_timeout", [0.001, 0.023, 0.1]) @pytest.mark.asyncio async def test_read_rows_operation_timeout(operation_timeout): @@ -156,28 +174,50 @@ async def test_read_rows_operation_timeout(operation_timeout): query = ReadRowsQuery() chunks = [_make_chunk(row_key=b"test_1")] with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks, sleep_time=1) - gen = await table.read_rows_stream(query, operation_timeout=operation_timeout) + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( + chunks, sleep_time=1 + ) + gen = await table.read_rows_stream( + query, operation_timeout=operation_timeout + ) try: [row async for row in gen] except core_exceptions.DeadlineExceeded as e: - assert e.message == f"operation_timeout of {operation_timeout:0.1f}s exceeded" + assert ( + e.message + == f"operation_timeout of {operation_timeout:0.1f}s exceeded" + ) -@pytest.mark.parametrize("per_row_t, operation_t, expected_num", - [(0.1, 0.01, 0), (0.01, 0.015, 1), (0.05, 0.54, 10), (0.05, 0.14, 2), (0.05, 0.21, 4)] + +@pytest.mark.parametrize( + "per_row_t, operation_t, expected_num", + [ + (0.1, 0.01, 0), + (0.01, 0.015, 1), + (0.05, 0.54, 10), + (0.05, 0.14, 2), + (0.05, 0.21, 4), + ], ) @pytest.mark.asyncio async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): from google.cloud.bigtable.exceptions import RetryExceptionGroup + # mocking uniform ensures there are no sleeps between retries - with mock.patch("random.uniform", side_effect=lambda a,b: 0): + with mock.patch("random.uniform", side_effect=lambda a, b: 0): async with _make_client() as client: table = client.get_table("instance", "table") query = ReadRowsQuery() chunks = [_make_chunk(row_key=b"test_1")] - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks, sleep_time=5) - gen = await table.read_rows_stream(query, per_row_timeout=per_row_t, operation_timeout=operation_t) + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( + chunks, sleep_time=5 + ) + gen = await table.read_rows_stream( + query, per_row_timeout=per_row_t, operation_timeout=operation_t + ) try: [row async for row in gen] except core_exceptions.DeadlineExceeded as deadline_exc: @@ -189,23 +229,43 @@ async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): assert f"{expected_num} failed attempts" in str(retry_exc) assert len(retry_exc.exceptions) == expected_num for sub_exc in retry_exc.exceptions: - assert sub_exc.message == f"per_row_timeout of {per_row_t:0.1f}s exceeded" + assert ( + sub_exc.message + == f"per_row_timeout of {per_row_t:0.1f}s exceeded" + ) + -@pytest.mark.parametrize("per_request_t, operation_t, expected_num", - [(0.1, 0.01, 0), (0.01, 0.015, 1), (0.05, 0.54, 10), (0.05, 0.14, 2), (0.05, 0.21, 4)] +@pytest.mark.parametrize( + "per_request_t, operation_t, expected_num", + [ + (0.1, 0.01, 0), + (0.01, 0.015, 1), + (0.05, 0.54, 10), + (0.05, 0.14, 2), + (0.05, 0.21, 4), + ], ) @pytest.mark.asyncio async def test_read_rows_per_request_timeout(per_request_t, operation_t, expected_num): from google.cloud.bigtable.exceptions import RetryExceptionGroup + # mocking uniform ensures there are no sleeps between retries - with mock.patch("random.uniform", side_effect=lambda a,b: 0): + with mock.patch("random.uniform", side_effect=lambda a, b: 0): async with _make_client() as client: table = client.get_table("instance", "table") query = ReadRowsQuery() chunks = [core_exceptions.DeadlineExceeded("mock deadline")] - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks, sleep_time=per_request_t) - gen = await table.read_rows_stream(query, operation_timeout=operation_t, per_request_timeout=per_request_t) + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( + chunks, sleep_time=per_request_t + ) + gen = await table.read_rows_stream( + query, + operation_timeout=operation_t, + per_request_timeout=per_request_t, + ) try: [row async for row in gen] except core_exceptions.DeadlineExceeded as e: @@ -218,20 +278,26 @@ async def test_read_rows_per_request_timeout(per_request_t, operation_t, expecte assert len(retry_exc.exceptions) == expected_num for sub_exc in retry_exc.exceptions: assert sub_exc.message == f"mock deadline" - assert read_rows.call_count == expected_num+1 + assert read_rows.call_count == expected_num + 1 called_kwargs = read_rows.call_args[1] assert called_kwargs["timeout"] == per_request_t + @pytest.mark.asyncio async def test_read_rows_idle_timeout(): from google.cloud.bigtable.client import ReadRowsIterator - from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient + from google.cloud.bigtable_v2.services.bigtable.async_client import ( + BigtableAsyncClient, + ) from google.cloud.bigtable.exceptions import IdleTimeout from google.cloud.bigtable.row_merger import RowMerger + chunks = [_make_chunk(row_key=b"test_1"), _make_chunk(row_key=b"test_2")] with mock.patch.object(BigtableAsyncClient, "read_rows") as read_rows: read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) - with mock.patch.object(ReadRowsIterator, "_start_idle_timer") as start_idle_timer: + with mock.patch.object( + ReadRowsIterator, "_start_idle_timer" + ) as start_idle_timer: client = _make_client() table = client.get_table("instance", "table") query = ReadRowsQuery() @@ -255,9 +321,17 @@ async def test_read_rows_idle_timeout(): aclose.assert_called_once() aclose.assert_awaited() -@pytest.mark.parametrize("exc_type", - [InvalidChunk, core_exceptions.DeadlineExceeded, core_exceptions.InternalServerError, - core_exceptions.ServiceUnavailable, core_exceptions.TooManyRequests, core_exceptions.ResourceExhausted] + +@pytest.mark.parametrize( + "exc_type", + [ + InvalidChunk, + core_exceptions.DeadlineExceeded, + core_exceptions.InternalServerError, + core_exceptions.ServiceUnavailable, + core_exceptions.TooManyRequests, + core_exceptions.ResourceExhausted, + ], ) @pytest.mark.asyncio async def test_read_rows_retryable_error(exc_type): @@ -266,7 +340,9 @@ async def test_read_rows_retryable_error(exc_type): query = ReadRowsQuery() expected_error = exc_type("mock error") with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream([expected_error]) + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( + [expected_error] + ) gen = await table.read_rows_stream(query, operation_timeout=0.1) try: [row async for row in gen] @@ -276,9 +352,18 @@ async def test_read_rows_retryable_error(exc_type): assert type(root_cause) == exc_type assert root_cause == expected_error -@pytest.mark.parametrize("exc_type", - [core_exceptions.Cancelled, core_exceptions.PreconditionFailed, core_exceptions.NotFound, - core_exceptions.PermissionDenied, core_exceptions.Conflict, core_exceptions.Aborted]) + +@pytest.mark.parametrize( + "exc_type", + [ + core_exceptions.Cancelled, + core_exceptions.PreconditionFailed, + core_exceptions.NotFound, + core_exceptions.PermissionDenied, + core_exceptions.Conflict, + core_exceptions.Aborted, + ], +) @pytest.mark.asyncio async def test_read_rows_non_retryable_error(exc_type): async with _make_client() as client: @@ -286,7 +371,9 @@ async def test_read_rows_non_retryable_error(exc_type): query = ReadRowsQuery() expected_error = exc_type("mock error") with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream([expected_error]) + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( + [expected_error] + ) gen = await table.read_rows_stream(query, operation_timeout=0.1) try: [row async for row in gen] @@ -302,11 +389,14 @@ async def test_read_rows_request_stats(): chunks = [_make_chunk(row_key=b"test_1")] stats = _make_stats() with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks, request_stats=stats) + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( + chunks, request_stats=stats + ) gen = await table.read_rows_stream(query) [row async for row in gen] assert gen.request_stats == stats + @pytest.mark.asyncio async def test_read_rows_request_stats_missing(): async with _make_client() as client: @@ -314,8 +404,9 @@ async def test_read_rows_request_stats_missing(): query = ReadRowsQuery() chunks = [_make_chunk(row_key=b"test_1")] with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks, request_stats=None) + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( + chunks, request_stats=None + ) gen = await table.read_rows_stream(query) [row async for row in gen] assert gen.request_stats is None - diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index b09c7f4b6..52d73aeb4 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -34,6 +34,7 @@ import mock # type: ignore from mock import AsyncMock # type: ignore + def parse_readrows_acceptance_tests(): dirname = os.path.dirname(__file__) filename = os.path.join(dirname, "./read-rows-acceptance-test.json") @@ -90,6 +91,7 @@ async def _scenerio_stream(): for expected, actual in zip_longest(test_case.results, results): assert actual == expected + @pytest.mark.parametrize( "test_case", parse_readrows_acceptance_tests(), ids=lambda t: t.description ) @@ -97,17 +99,24 @@ async def _scenerio_stream(): async def test_read_rows_scenario(test_case: ReadRowsTest): async def _make_gapic_stream(chunk_list: list[ReadRowsResponse]): from google.cloud.bigtable_v2 import ReadRowsResponse + async def inner(): for chunk in chunk_list: yield ReadRowsResponse(chunks=[chunk]) + return inner() + try: client = BigtableDataClient() table = client.get_table("instance", "table") results = [] with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(test_case.chunks) - async for row in await table.read_rows_stream(query={}, operation_timeout=0.02): + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( + test_case.chunks + ) + async for row in await table.read_rows_stream( + query={}, operation_timeout=0.02 + ): for cell in row: cell_result = ReadRowsTest.Result( row_key=cell.row_key, From 6f1c78100e21908ada162a80c2cd90ada8667bef Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 13:17:27 -0700 Subject: [PATCH 230/349] made invalid chunk a server error --- google/cloud/bigtable/row_merger.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index c627a4702..ac8be0750 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -36,7 +36,7 @@ ) -class InvalidChunk(RuntimeError): +class InvalidChunk(core_exceptions.ServerError): """Exception raised to invalid chunk data from back-end.""" @@ -79,7 +79,6 @@ def __init__( revise_on_retry, ) predicate = retries.if_exception_type( - InvalidChunk, core_exceptions.ServerError, core_exceptions.TooManyRequests, ) From 38f66e5abf7f3c5b8aca37590542075e8fc698b5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 13:45:38 -0700 Subject: [PATCH 231/349] moved invalid chunk with other exceptions --- google/cloud/bigtable/exceptions.py | 9 +++++++++ google/cloud/bigtable/row_merger.py | 6 ++---- tests/unit/test_client_read_rows.py | 2 +- tests/unit/test_read_rows_acceptance.py | 3 ++- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 20b375ead..bdde7c173 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -21,9 +21,18 @@ class IdleTimeout(core_exceptions.DeadlineExceeded): + """ + Exception raised by ReadRowsIterator when the generator + has been idle for longer than the internal idle_timeout. + """ + pass +class InvalidChunk(core_exceptions.ServerError): + """Exception raised to invalid chunk data from back-end.""" + + class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore # noqa: F821 """ Represents one or more exceptions that occur during a bulk Bigtable operation diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/row_merger.py index ac8be0750..09d2abaea 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -18,6 +18,7 @@ from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient from google.cloud.bigtable_v2.types import RequestStats from google.cloud.bigtable.row import Row, Cell, _LastScannedRow +from google.cloud.bigtable.exceptions import InvalidChunk import asyncio from functools import partial from google.api_core import retry_async as retries @@ -36,10 +37,6 @@ ) -class InvalidChunk(core_exceptions.ServerError): - """Exception raised to invalid chunk data from back-end.""" - - class RowMerger(AsyncIterable[Row]): """ RowMerger handles the logic of merging chunks from a ReadRowsResponse stream @@ -79,6 +76,7 @@ def __init__( revise_on_retry, ) predicate = retries.if_exception_type( + InvalidChunk, core_exceptions.ServerError, core_exceptions.TooManyRequests, ) diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 812c75844..a5e8dc98f 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -21,7 +21,7 @@ from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable_v2.types import RequestStats from google.api_core import exceptions as core_exceptions -from google.cloud.bigtable.row_merger import InvalidChunk +from google.cloud.bigtable.exceptions import InvalidChunk # try/except added for compatibility with python < 3.8 try: diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index 52d73aeb4..6ab5cd9ea 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -21,7 +21,8 @@ from google.cloud.bigtable_v2 import ReadRowsResponse from google.cloud.bigtable.client import BigtableDataClient -from google.cloud.bigtable.row_merger import RowMerger, InvalidChunk, StateMachine +from google.cloud.bigtable.exceptions import InvalidChunk +from google.cloud.bigtable.row_merger import RowMerger, StateMachine from google.cloud.bigtable.row import Row from .v2_client.test_row_merger import ReadRowsTest, TestFile From bf24c255c6e0d46a59d90d04d5190fbb63692a3b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 14:00:59 -0700 Subject: [PATCH 232/349] made row merger and classes private --- .../{row_merger.py => _row_merger.py} | 38 +++++++++---------- google/cloud/bigtable/client.py | 14 +++---- ...test_row_merger.py => test__row_merger.py} | 16 ++++---- tests/unit/test_client_read_rows.py | 4 +- tests/unit/test_read_rows_acceptance.py | 14 +++---- 5 files changed, 43 insertions(+), 43 deletions(-) rename google/cloud/bigtable/{row_merger.py => _row_merger.py} (97%) rename tests/unit/{test_row_merger.py => test__row_merger.py} (83%) diff --git a/google/cloud/bigtable/row_merger.py b/google/cloud/bigtable/_row_merger.py similarity index 97% rename from google/cloud/bigtable/row_merger.py rename to google/cloud/bigtable/_row_merger.py index 09d2abaea..cbbd3c286 100644 --- a/google/cloud/bigtable/row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -37,7 +37,7 @@ ) -class RowMerger(AsyncIterable[Row]): +class _RowMerger(AsyncIterable[Row]): """ RowMerger handles the logic of merging chunks from a ReadRowsResponse stream into a stream of Row objects. @@ -150,19 +150,19 @@ async def retryable_merge_rows( """ if revise_on_retry and self.last_seen_row_key is not None: # if this is a retry, try to trim down the request to avoid ones we've already processed - self.request["rows"] = RowMerger._revise_request_rowset( + self.request["rows"] = _RowMerger._revise_request_rowset( self.request.get("rows", None), self.last_seen_row_key, self.emitted_rows, ) new_gapic_stream = await gapic_fn(self.request, timeout=per_request_timeout) cache: asyncio.Queue[Row | RequestStats] = asyncio.Queue(maxsize=cache_size) - state_machine = StateMachine() + state_machine = _StateMachine() try: stream_task = asyncio.create_task( - RowMerger._generator_to_cache( + _RowMerger._generator_to_cache( cache, - RowMerger.merge_row_response_stream( + _RowMerger.merge_row_response_stream( new_gapic_stream, state_machine ), ) @@ -244,7 +244,7 @@ def _revise_request_rowset( @staticmethod async def merge_row_response_stream( - request_generator: AsyncIterable[ReadRowsResponse], state_machine: StateMachine + request_generator: AsyncIterable[ReadRowsResponse], state_machine: _StateMachine ) -> AsyncGenerator[Row | RequestStats, None]: """ Consume chunks from a ReadRowsResponse stream into a set of Rows @@ -277,7 +277,7 @@ async def merge_row_response_stream( raise InvalidChunk("read_rows completed with partial state remaining") -class StateMachine: +class _StateMachine: """ State Machine converts chunks into Rows @@ -296,14 +296,14 @@ def __init__(self): # represents either the last row emitted, or the last_scanned_key sent from backend # all future rows should have keys > last_seen_row_key self.last_seen_row_key: bytes | None = None - self.adapter: "RowBuilder" = RowBuilder() + self.adapter = _RowBuilder() self._reset_row() def _reset_row(self) -> None: """ Drops the current row and transitions to AWAITING_NEW_ROW to start a fresh one """ - self.current_state: State = AWAITING_NEW_ROW(self) + self.current_state: _State = AWAITING_NEW_ROW(self) self.current_family: str | None = None self.current_qualifier: bytes | None = None # self.expected_cell_size:int = 0 @@ -402,7 +402,7 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): raise InvalidChunk("Failed to reset state machine") -class State(ABC): +class _State(ABC): """ Represents a state the state machine can be in @@ -410,15 +410,15 @@ class State(ABC): transitioning to the next state """ - def __init__(self, owner: StateMachine): + def __init__(self, owner: _StateMachine): self._owner = owner @abstractmethod - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": pass -class AWAITING_NEW_ROW(State): +class AWAITING_NEW_ROW(_State): """ Default state Awaiting a chunk to start a new row @@ -426,7 +426,7 @@ class AWAITING_NEW_ROW(State): - AWAITING_NEW_CELL: when a chunk with a row_key is received """ - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": if not chunk.row_key: raise InvalidChunk("New row is missing a row key") self._owner.adapter.start_row(chunk.row_key) @@ -435,7 +435,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": return AWAITING_NEW_CELL(self._owner).handle_chunk(chunk) -class AWAITING_NEW_CELL(State): +class AWAITING_NEW_CELL(_State): """ Represents a cell boundary witin a row @@ -444,7 +444,7 @@ class AWAITING_NEW_CELL(State): - AWAITING_CELL_VALUE: when the value is split across multiple chunks """ - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": is_split = chunk.value_size > 0 # expected_cell_size = chunk.value_size if is_split else len(chunk.value) # track latest cell data. New chunks won't send repeated data @@ -477,7 +477,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": return AWAITING_NEW_CELL(self._owner) -class AWAITING_CELL_VALUE(State): +class AWAITING_CELL_VALUE(_State): """ State that represents a split cell's continuation @@ -486,7 +486,7 @@ class AWAITING_CELL_VALUE(State): - AWAITING_CELL_VALUE: when additional value chunks are required """ - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": + def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": # ensure reset chunk matches expectations if chunk.row_key: raise InvalidChunk("Found row key mid cell") @@ -509,7 +509,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "State": return AWAITING_NEW_CELL(self._owner) -class RowBuilder: +class _RowBuilder: """ called by state machine to build rows State machine makes the following guarantees: diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index a2b114d20..65ed3f6a6 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -38,7 +38,7 @@ ) from google.cloud.client import ClientWithProject from google.api_core.exceptions import GoogleAPICallError -from google.cloud.bigtable.row_merger import RowMerger +from google.cloud.bigtable._row_merger import _RowMerger from google.cloud.bigtable_v2.types import RequestStats import google.auth.credentials @@ -399,7 +399,7 @@ async def read_rows_stream( # - RowMerger.merge_row_response_stream: parses chunks into rows # - RowMerger.retryable_merge_rows: adds retries, caching, revised requests, per_row_timeout, per_row_timeout # - ReadRowsIterator: adds idle_timeout, moves stats out of stream and into attribute - row_merger = RowMerger( + row_merger = _RowMerger( request, self.client._gapic_client, cache_size=cache_size, @@ -685,8 +685,8 @@ class ReadRowsIterator(AsyncIterable[Row]): User-facing async generator for streaming read_rows responses """ - def __init__(self, merger: RowMerger): - self._merger_or_error: RowMerger | Exception = merger + def __init__(self, merger: _RowMerger): + self._merger_or_error: _RowMerger | Exception = merger self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() self._idle_timeout_task: asyncio.Task[None] | None = None @@ -702,7 +702,7 @@ async def _start_idle_timer(self, idle_timeout: float): self._idle_timeout_task.name = "ReadRowsIterator._idle_timeout" def active(self): - return isinstance(self._merger_or_error, RowMerger) + return isinstance(self._merger_or_error, _RowMerger) async def _idle_timeout_coroutine(self, idle_timeout: float): while self.active(): @@ -722,7 +722,7 @@ async def __anext__(self) -> Row: if isinstance(self._merger_or_error, Exception): raise self._merger_or_error else: - merger = cast(RowMerger, self._merger_or_error) + merger = cast(_RowMerger, self._merger_or_error) try: self.last_interaction_time = time.time() next_item = await merger.__anext__() @@ -749,7 +749,7 @@ async def __anext__(self) -> Row: raise e async def _finish_with_error(self, e: Exception): - if isinstance(self._merger_or_error, RowMerger): + if isinstance(self._merger_or_error, _RowMerger): await self._merger_or_error.aclose() del self._merger_or_error self._merger_or_error = e diff --git a/tests/unit/test_row_merger.py b/tests/unit/test__row_merger.py similarity index 83% rename from tests/unit/test_row_merger.py rename to tests/unit/test__row_merger.py index 66ed852ac..968549308 100644 --- a/tests/unit/test_row_merger.py +++ b/tests/unit/test__row_merger.py @@ -1,7 +1,7 @@ import unittest from unittest import mock -from google.cloud.bigtable.row_merger import InvalidChunk +from google.cloud.bigtable.exceptions import InvalidChunk TEST_FAMILY = "family_name" TEST_QUALIFIER = b"column_qualifier" @@ -12,9 +12,9 @@ class TestRowMerger(unittest.IsolatedAsyncioTestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable.row_merger import RowMerger + from google.cloud.bigtable._row_merger import _RowMerger - return RowMerger + return _RowMerger def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) @@ -23,9 +23,9 @@ def _make_one(self, *args, **kwargs): class TestStateMachine(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable.row_merger import StateMachine + from google.cloud.bigtable._row_merger import _StateMachine - return StateMachine + return _StateMachine def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) @@ -38,15 +38,15 @@ class TestState(unittest.TestCase): class TestRowBuilder(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable.row_merger import RowBuilder + from google.cloud.bigtable._row_merger import _RowBuilder - return RowBuilder + return _RowBuilder def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) def test_ctor(self): - with mock.patch("google.cloud.bigtable.row_merger.RowBuilder.reset") as reset: + with mock.patch.object(self._get_target_class(), "reset") as reset: self._make_one() reset.assert_called_once() row_builder = self._make_one() diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index a5e8dc98f..c50eddafd 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -290,7 +290,7 @@ async def test_read_rows_idle_timeout(): BigtableAsyncClient, ) from google.cloud.bigtable.exceptions import IdleTimeout - from google.cloud.bigtable.row_merger import RowMerger + from google.cloud.bigtable._row_merger import _RowMerger chunks = [_make_chunk(row_key=b"test_1"), _make_chunk(row_key=b"test_2")] with mock.patch.object(BigtableAsyncClient, "read_rows") as read_rows: @@ -304,7 +304,7 @@ async def test_read_rows_idle_timeout(): gen = await table.read_rows_stream(query) # should start idle timer on creation start_idle_timer.assert_called_once() - with mock.patch.object(RowMerger, "aclose", AsyncMock()) as aclose: + with mock.patch.object(_RowMerger, "aclose", AsyncMock()) as aclose: # start idle timer with our own value await gen._start_idle_timer(0.1) # should timeout after being abandoned diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index 6ab5cd9ea..c2cf1ec98 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -22,7 +22,7 @@ from google.cloud.bigtable.client import BigtableDataClient from google.cloud.bigtable.exceptions import InvalidChunk -from google.cloud.bigtable.row_merger import RowMerger, StateMachine +from google.cloud.bigtable._row_merger import _RowMerger, _StateMachine from google.cloud.bigtable.row import Row from .v2_client.test_row_merger import ReadRowsTest, TestFile @@ -72,9 +72,9 @@ async def _scenerio_stream(): yield ReadRowsResponse(chunks=[chunk]) try: - state = StateMachine() + state = _StateMachine() results = [] - async for row in RowMerger.merge_row_response_stream(_scenerio_stream(), state): + async for row in _RowMerger.merge_row_response_stream(_scenerio_stream(), state): for cell in row: cell_result = ReadRowsTest.Result( row_key=cell.row_key, @@ -144,10 +144,10 @@ async def test_out_of_order_rows(): async def _row_stream(): yield ReadRowsResponse(last_scanned_row_key=b"a") - state = StateMachine() + state = _StateMachine() state.last_seen_row_key = b"a" with pytest.raises(InvalidChunk): - async for _ in RowMerger.merge_row_response_stream(_row_stream(), state): + async for _ in _RowMerger.merge_row_response_stream(_row_stream(), state): pass @@ -302,8 +302,8 @@ async def _process_chunks(*chunks): async def _row_stream(): yield ReadRowsResponse(chunks=chunks) - state = StateMachine() + state = _StateMachine() results = [] - async for row in RowMerger.merge_row_response_stream(_row_stream(), state): + async for row in _RowMerger.merge_row_response_stream(_row_stream(), state): results.append(row) return results From 4dbacb5c8c729ae7f30df4e2705bacb7e45f3d63 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 14:10:45 -0700 Subject: [PATCH 233/349] added read_rows --- google/cloud/bigtable/client.py | 11 ++++-- tests/unit/test_client_read_rows.py | 55 +++++++++++++++++------------ 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 65ed3f6a6..b2c7553c8 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -416,8 +416,6 @@ async def read_rows( self, query: ReadRowsQuery | dict[str, Any], *, - shard: bool = False, - limit: int | None, operation_timeout: int | float | None = 60, per_row_timeout: int | float | None = 10, per_request_timeout: int | float | None = None, @@ -430,7 +428,14 @@ async def read_rows( Returns: - a list of the rows returned by the query """ - raise NotImplementedError + row_generator = await self.read_rows_stream( + query, + operation_timeout=operation_timeout, + per_row_timeout=per_row_timeout, + per_request_timeout=per_request_timeout, + ) + results = [row async for row in row_generator] + return results async def read_row( self, diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index c50eddafd..b6f910466 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -93,13 +93,28 @@ async def test_read_rows(): client = _make_client() table = client.get_table("instance", "table") query = ReadRowsQuery() - chunks = [_make_chunk(row_key=b"test_1")] + chunks = [_make_chunk(row_key=b"test_1"), _make_chunk(row_key=b"test_2")] + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) + results = await table.read_rows(query, operation_timeout=3) + assert len(results) == 2 + assert results[0].row_key == b"test_1" + assert results[1].row_key == b"test_2" + await client.close() + +@pytest.mark.asyncio +async def test_read_rows_stream(): + client = _make_client() + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [_make_chunk(row_key=b"test_1"), _make_chunk(row_key=b"test_2")] with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) gen = await table.read_rows_stream(query, operation_timeout=3) results = [row async for row in gen] - assert len(results) == 1 + assert len(results) == 2 assert results[0].row_key == b"test_1" + assert results[1].row_key == b"test_2" await client.close() @@ -120,8 +135,7 @@ async def test_read_rows_query_matches_request(include_app_profile): ) with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream([]) - gen = await table.read_rows_stream(query, operation_timeout=3) - results = [row async for row in gen] + results = await table.read_rows(query, operation_timeout=3) assert len(results) == 0 call_request = read_rows.call_args_list[0][0][0] query_dict = query._to_dict() @@ -177,11 +191,10 @@ async def test_read_rows_operation_timeout(operation_timeout): read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( chunks, sleep_time=1 ) - gen = await table.read_rows_stream( - query, operation_timeout=operation_timeout - ) try: - [row async for row in gen] + await table.read_rows( + query, operation_timeout=operation_timeout + ) except core_exceptions.DeadlineExceeded as e: assert ( e.message @@ -215,11 +228,10 @@ async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( chunks, sleep_time=5 ) - gen = await table.read_rows_stream( - query, per_row_timeout=per_row_t, operation_timeout=operation_t - ) try: - [row async for row in gen] + await table.read_rows( + query, per_row_timeout=per_row_t, operation_timeout=operation_t + ) except core_exceptions.DeadlineExceeded as deadline_exc: retry_exc = deadline_exc.__cause__ if expected_num == 0: @@ -242,7 +254,7 @@ async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): (0.01, 0.015, 1), (0.05, 0.54, 10), (0.05, 0.14, 2), - (0.05, 0.21, 4), + (0.05, 0.24, 4), ], ) @pytest.mark.asyncio @@ -261,13 +273,12 @@ async def test_read_rows_per_request_timeout(per_request_t, operation_t, expecte read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( chunks, sleep_time=per_request_t ) - gen = await table.read_rows_stream( - query, - operation_timeout=operation_t, - per_request_timeout=per_request_t, - ) try: - [row async for row in gen] + await table.read_rows( + query, + operation_timeout=operation_t, + per_request_timeout=per_request_t, + ) except core_exceptions.DeadlineExceeded as e: retry_exc = e.__cause__ if expected_num == 0: @@ -343,9 +354,8 @@ async def test_read_rows_retryable_error(exc_type): read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( [expected_error] ) - gen = await table.read_rows_stream(query, operation_timeout=0.1) try: - [row async for row in gen] + await table.read_rows(query, operation_timeout=0.1) except core_exceptions.DeadlineExceeded as e: retry_exc = e.__cause__ root_cause = retry_exc.exceptions[0] @@ -374,9 +384,8 @@ async def test_read_rows_non_retryable_error(exc_type): read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( [expected_error] ) - gen = await table.read_rows_stream(query, operation_timeout=0.1) try: - [row async for row in gen] + await table.read_rows(query, operation_timeout=0.1) except exc_type as e: assert e == expected_error From 6e6978e8b9783159031f8642272b66f59c8b898b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 14:11:26 -0700 Subject: [PATCH 234/349] ran blacken --- tests/unit/test_client_read_rows.py | 5 ++--- tests/unit/test_read_rows_acceptance.py | 4 +++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index b6f910466..c36a402d7 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -102,6 +102,7 @@ async def test_read_rows(): assert results[1].row_key == b"test_2" await client.close() + @pytest.mark.asyncio async def test_read_rows_stream(): client = _make_client() @@ -192,9 +193,7 @@ async def test_read_rows_operation_timeout(operation_timeout): chunks, sleep_time=1 ) try: - await table.read_rows( - query, operation_timeout=operation_timeout - ) + await table.read_rows(query, operation_timeout=operation_timeout) except core_exceptions.DeadlineExceeded as e: assert ( e.message diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index c2cf1ec98..a51401b38 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -74,7 +74,9 @@ async def _scenerio_stream(): try: state = _StateMachine() results = [] - async for row in _RowMerger.merge_row_response_stream(_scenerio_stream(), state): + async for row in _RowMerger.merge_row_response_stream( + _scenerio_stream(), state + ): for cell in row: cell_result = ReadRowsTest.Result( row_key=cell.row_key, From 21f7846c44330846dd8ad10271b734a652c94aa4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 14:21:57 -0700 Subject: [PATCH 235/349] added comments --- google/cloud/bigtable/_row_merger.py | 36 +++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index cbbd3c286..69d07f150 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -36,6 +36,21 @@ AsyncGenerator, ) +""" +This module provides a set of classes for merging ReadRowsResponse chunks +into Row objects. + +- RowMerger is the highest level class, providing an interface for asynchronous + merging with or without retrues +- StateMachine is used internally to track the state of the merge, including + rows the current row and the keys of the rows that have been processed. + It processes a stream of chunks, and will raise InvalidChunk if it reaches + an invalid state. +- State classes track the current state of the StateMachine, and define what + to do on the next chunk. +- RowBuilder is used by the StateMachine to build a Row object. +""" + class _RowMerger(AsyncIterable[Row]): """ @@ -61,6 +76,15 @@ def __init__( per_request_timeout: float | None = None, revise_on_retry: bool = True, ): + """ + Args: + - request: the request dict to send to the Bigtable API + - client: the Bigtable client to use to make the request + - cache_size: the size of the buffer to use for caching rows from the network + - operation_timeout: the timeout to use for the entire operation, in seconds + - per_row_timeout: the timeout to use when waiting for each individual row, in seconds + - revise_on_retry: if True, retried request will be modified based on rows that have already been seen + """ self.last_seen_row_key: bytes | None = None self.emitted_rows: Set[bytes] = set() cache_size = max(cache_size, 0) @@ -100,16 +124,18 @@ def on_error_fn(exc): self.errors: List[Exception] = [] def __aiter__(self) -> AsyncIterator[Row | RequestStats]: + """Implements the AsyncIterable interface""" return self async def __anext__(self) -> Row | RequestStats: + """Implements the AsyncIterator interface""" if isinstance(self.stream, AsyncGenerator): return await self.stream.__anext__() else: raise asyncio.InvalidStateError("stream is closed") async def aclose(self): - # release resources + """Close the stream and release resources""" if isinstance(self.stream, AsyncGenerator): await self.stream.aclose() del self.stream @@ -220,6 +246,14 @@ def _revise_request_rowset( last_seen_row_key: bytes, emitted_rows: Set[bytes], ) -> dict[str, Any]: + """ + Revise the rows in the request to avoid ones we've already processed. + + Args: + - row_set: the row set from the request + - last_seen_row_key: the last row key encountered + - emitted_rows: the set of row keys that have already been emitted + """ # if user is doing a whole table scan, start a new one with the last seen key if row_set is None: last_seen = last_seen_row_key From 52e9dbfbfd41d650e4572ddb7313b1b82e872c7d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 14:56:27 -0700 Subject: [PATCH 236/349] added test for revise rowset --- google/cloud/bigtable/_row_merger.py | 6 +++--- tests/unit/test_client_read_rows.py | 32 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 69d07f150..76df5a82f 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -177,9 +177,9 @@ async def retryable_merge_rows( if revise_on_retry and self.last_seen_row_key is not None: # if this is a retry, try to trim down the request to avoid ones we've already processed self.request["rows"] = _RowMerger._revise_request_rowset( - self.request.get("rows", None), - self.last_seen_row_key, - self.emitted_rows, + row_set=self.request.get("rows", None), + last_seen_row_key=self.last_seen_row_key, + emitted_rows=self.emitted_rows, ) new_gapic_stream = await gapic_fn(self.request, timeout=per_request_timeout) cache: asyncio.Queue[Row | RequestStats] = asyncio.Queue(maxsize=cache_size) diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index c36a402d7..cb77e3759 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -418,3 +418,35 @@ async def test_read_rows_request_stats_missing(): gen = await table.read_rows_stream(query) [row async for row in gen] assert gen.request_stats is None + + +@pytest.mark.asyncio +async def test_read_rows_revise_request(): + from google.cloud.bigtable._row_merger import _RowMerger + + with mock.patch.object(_RowMerger, "_revise_request_rowset") as revise_rowset: + with mock.patch.object(_RowMerger, "aclose"): + revise_rowset.side_effect = [True, core_exceptions.Aborted("mock error")] + async with _make_client() as client: + table = client.get_table("instance", "table") + row_keys = [b"test_1", b"test_2", b"test_3"] + query = ReadRowsQuery(row_keys=row_keys) + chunks = [_make_chunk(row_key=b"test_1"), InvalidChunk("mock error")] + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( + chunks, request_stats=None + ) + try: + await table.read_rows(query) + except core_exceptions.Aborted: + revise_rowset.assert_called() + first_call_kwargs = revise_rowset.call_args_list[0].kwargs + assert first_call_kwargs["row_set"] == query._to_dict()["rows"] + assert first_call_kwargs["last_seen_row_key"] == b"test_1" + assert first_call_kwargs["emitted_rows"] == {b"test_1"} + second_call_kwargs = revise_rowset.call_args_list[1].kwargs + assert second_call_kwargs["row_set"] == True + assert second_call_kwargs["last_seen_row_key"] == b"test_1" + assert second_call_kwargs["emitted_rows"] == {b"test_1"} From 715be51152920239fcda416fb9a3dfdba0e69481 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 15:02:30 -0700 Subject: [PATCH 237/349] fixed lint issues --- google/cloud/bigtable/_row_merger.py | 2 +- google/cloud/bigtable/client.py | 4 +--- tests/unit/test_client_read_rows.py | 9 ++++++--- tests/unit/test_read_rows_acceptance.py | 9 +-------- 4 files changed, 9 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 76df5a82f..bd21b3720 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -40,7 +40,7 @@ This module provides a set of classes for merging ReadRowsResponse chunks into Row objects. -- RowMerger is the highest level class, providing an interface for asynchronous +- RowMerger is the highest level class, providing an interface for asynchronous merging with or without retrues - StateMachine is used internally to track the state of the merge, including rows the current row and the keys of the rows that have been processed. diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index b2c7553c8..2595f3f52 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -53,8 +53,6 @@ if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry from google.cloud.bigtable.mutations_batcher import MutationsBatcher - from google.cloud.bigtable.row import Row - from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable import RowKeySamples from google.cloud.bigtable.row_filters import RowFilter from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule @@ -416,7 +414,7 @@ async def read_rows( self, query: ReadRowsQuery | dict[str, Any], *, - operation_timeout: int | float | None = 60, + operation_timeout: float = 60, per_row_timeout: int | float | None = 10, per_request_timeout: int | float | None = None, ) -> list[Row]: diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index cb77e3759..6465a8262 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -287,7 +287,7 @@ async def test_read_rows_per_request_timeout(per_request_t, operation_t, expecte assert f"{expected_num} failed attempts" in str(retry_exc) assert len(retry_exc.exceptions) == expected_num for sub_exc in retry_exc.exceptions: - assert sub_exc.message == f"mock deadline" + assert sub_exc.message == "mock deadline" assert read_rows.call_count == expected_num + 1 called_kwargs = read_rows.call_args[1] assert called_kwargs["timeout"] == per_request_t @@ -426,7 +426,10 @@ async def test_read_rows_revise_request(): with mock.patch.object(_RowMerger, "_revise_request_rowset") as revise_rowset: with mock.patch.object(_RowMerger, "aclose"): - revise_rowset.side_effect = [True, core_exceptions.Aborted("mock error")] + revise_rowset.side_effect = [ + "modified", + core_exceptions.Aborted("mock error"), + ] async with _make_client() as client: table = client.get_table("instance", "table") row_keys = [b"test_1", b"test_2", b"test_3"] @@ -447,6 +450,6 @@ async def test_read_rows_revise_request(): assert first_call_kwargs["last_seen_row_key"] == b"test_1" assert first_call_kwargs["emitted_rows"] == {b"test_1"} second_call_kwargs = revise_rowset.call_args_list[1].kwargs - assert second_call_kwargs["row_set"] == True + assert second_call_kwargs["row_set"] == "modified" assert second_call_kwargs["last_seen_row_key"] == b"test_1" assert second_call_kwargs["emitted_rows"] == {b"test_1"} diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index a51401b38..dfb9dc83e 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -17,6 +17,7 @@ from itertools import zip_longest import pytest +import mock from google.cloud.bigtable_v2 import ReadRowsResponse @@ -27,14 +28,6 @@ from .v2_client.test_row_merger import ReadRowsTest, TestFile -# try/except added for compatibility with python < 3.8 -try: - from unittest import mock - from unittest.mock import AsyncMock # type: ignore -except ImportError: # pragma: NO COVER - import mock # type: ignore - from mock import AsyncMock # type: ignore - def parse_readrows_acceptance_tests(): dirname = os.path.dirname(__file__) From 2f50cb7e3a710da497799f6e7a100e6759804ded Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 15:20:57 -0700 Subject: [PATCH 238/349] moved ReadRowsIterator into new file --- google/cloud/bigtable/client.py | 84 +--------------------- google/cloud/bigtable/iterators.py | 108 +++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 83 deletions(-) create mode 100644 google/cloud/bigtable/iterators.py diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 2595f3f52..9df71c95f 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -19,7 +19,6 @@ cast, Any, Optional, - AsyncIterable, Set, TYPE_CHECKING, ) @@ -39,16 +38,13 @@ from google.cloud.client import ClientWithProject from google.api_core.exceptions import GoogleAPICallError from google.cloud.bigtable._row_merger import _RowMerger -from google.cloud.bigtable_v2.types import RequestStats import google.auth.credentials -from google.api_core import exceptions as core_exceptions import google.auth._default from google.api_core import client_options as client_options_lib from google.cloud.bigtable.row import Row from google.cloud.bigtable.read_rows_query import ReadRowsQuery -from google.cloud.bigtable.exceptions import RetryExceptionGroup -from google.cloud.bigtable.exceptions import IdleTimeout +from google.cloud.bigtable.iterators import ReadRowsIterator if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry @@ -681,81 +677,3 @@ async def read_modify_write_row( - GoogleAPIError exceptions from grpc call """ raise NotImplementedError - - -class ReadRowsIterator(AsyncIterable[Row]): - """ - User-facing async generator for streaming read_rows responses - """ - - def __init__(self, merger: _RowMerger): - self._merger_or_error: _RowMerger | Exception = merger - self.request_stats: RequestStats | None = None - self.last_interaction_time = time.time() - self._idle_timeout_task: asyncio.Task[None] | None = None - - async def _start_idle_timer(self, idle_timeout: float): - self.last_interaction_time = time.time() - if self._idle_timeout_task is not None: - self._idle_timeout_task.cancel() - self._idle_timeout_task = asyncio.create_task( - self._idle_timeout_coroutine(idle_timeout) - ) - if sys.version_info >= (3, 8): - self._idle_timeout_task.name = "ReadRowsIterator._idle_timeout" - - def active(self): - return isinstance(self._merger_or_error, _RowMerger) - - async def _idle_timeout_coroutine(self, idle_timeout: float): - while self.active(): - next_timeout = self.last_interaction_time + idle_timeout - await asyncio.sleep(next_timeout - time.time()) - if ( - self.last_interaction_time + idle_timeout < time.time() - and self.active() - ): - # idle timeout has expired - await self._finish_with_error(IdleTimeout("idle timeout expired")) - - def __aiter__(self): - return self - - async def __anext__(self) -> Row: - if isinstance(self._merger_or_error, Exception): - raise self._merger_or_error - else: - merger = cast(_RowMerger, self._merger_or_error) - try: - self.last_interaction_time = time.time() - next_item = await merger.__anext__() - if isinstance(next_item, RequestStats): - self.request_stats = next_item - return await self.__anext__() - else: - return next_item - except core_exceptions.RetryError: - # raised by AsyncRetry after operation deadline exceeded - new_exc = core_exceptions.DeadlineExceeded( - f"operation_timeout of {merger.operation_timeout:0.1f}s exceeded" - ) - source_exc = None - if merger.errors: - source_exc = RetryExceptionGroup( - f"{len(merger.errors)} failed attempts", merger.errors - ) - new_exc.__cause__ = source_exc - await self._finish_with_error(new_exc) - raise new_exc from source_exc - except Exception as e: - await self._finish_with_error(e) - raise e - - async def _finish_with_error(self, e: Exception): - if isinstance(self._merger_or_error, _RowMerger): - await self._merger_or_error.aclose() - del self._merger_or_error - self._merger_or_error = e - if self._idle_timeout_task is not None: - self._idle_timeout_task.cancel() - self._idle_timeout_task = None diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py new file mode 100644 index 000000000..cf02bd314 --- /dev/null +++ b/google/cloud/bigtable/iterators.py @@ -0,0 +1,108 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import ( + cast, + AsyncIterable, +) +import asyncio +import time +import sys + +from google.cloud.bigtable._row_merger import _RowMerger +from google.cloud.bigtable_v2.types import RequestStats +from google.api_core import exceptions as core_exceptions +from google.cloud.bigtable.exceptions import RetryExceptionGroup +from google.cloud.bigtable.exceptions import IdleTimeout +from google.cloud.bigtable.row import Row + + +class ReadRowsIterator(AsyncIterable[Row]): + """ + User-facing async generator for streaming read_rows responses + """ + + def __init__(self, merger: _RowMerger): + self._merger_or_error: _RowMerger | Exception = merger + self.request_stats: RequestStats | None = None + self.last_interaction_time = time.time() + self._idle_timeout_task: asyncio.Task[None] | None = None + + async def _start_idle_timer(self, idle_timeout: float): + self.last_interaction_time = time.time() + if self._idle_timeout_task is not None: + self._idle_timeout_task.cancel() + self._idle_timeout_task = asyncio.create_task( + self._idle_timeout_coroutine(idle_timeout) + ) + if sys.version_info >= (3, 8): + self._idle_timeout_task.name = "ReadRowsIterator._idle_timeout" + + def active(self): + return isinstance(self._merger_or_error, _RowMerger) + + async def _idle_timeout_coroutine(self, idle_timeout: float): + while self.active(): + next_timeout = self.last_interaction_time + idle_timeout + await asyncio.sleep(next_timeout - time.time()) + if ( + self.last_interaction_time + idle_timeout < time.time() + and self.active() + ): + # idle timeout has expired + await self._finish_with_error(IdleTimeout("idle timeout expired")) + + def __aiter__(self): + return self + + async def __anext__(self) -> Row: + if isinstance(self._merger_or_error, Exception): + raise self._merger_or_error + else: + merger = cast(_RowMerger, self._merger_or_error) + try: + self.last_interaction_time = time.time() + next_item = await merger.__anext__() + if isinstance(next_item, RequestStats): + self.request_stats = next_item + return await self.__anext__() + else: + return next_item + except core_exceptions.RetryError: + # raised by AsyncRetry after operation deadline exceeded + new_exc = core_exceptions.DeadlineExceeded( + f"operation_timeout of {merger.operation_timeout:0.1f}s exceeded" + ) + source_exc = None + if merger.errors: + source_exc = RetryExceptionGroup( + f"{len(merger.errors)} failed attempts", merger.errors + ) + new_exc.__cause__ = source_exc + await self._finish_with_error(new_exc) + raise new_exc from source_exc + except Exception as e: + await self._finish_with_error(e) + raise e + + async def _finish_with_error(self, e: Exception): + if isinstance(self._merger_or_error, _RowMerger): + await self._merger_or_error.aclose() + del self._merger_or_error + self._merger_or_error = e + if self._idle_timeout_task is not None: + self._idle_timeout_task.cancel() + self._idle_timeout_task = None From 28d5a7a79db481e6a6429be08da2d129d6a1ee6c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 15:27:18 -0700 Subject: [PATCH 239/349] fixed lint issues --- google/cloud/bigtable/client.py | 2 +- tests/unit/test_client.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 620fedd76..501180db8 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -347,7 +347,7 @@ async def read_rows_stream( per_row_timeout: int | float | None = 10, idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, - ) -> AsyncIterable[RowResponse]: + ) -> AsyncIterable[Row]: """ Returns a generator to asynchronously stream back row data. diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ebe26c32e..78495d9e3 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -411,7 +411,9 @@ async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sle try: client = _make_one(project="project-id") if refresh_interval is not None: - await client._manage_channel(channel_idx, refresh_interval, refresh_interval) + await client._manage_channel( + channel_idx, refresh_interval, refresh_interval + ) else: await client._manage_channel(channel_idx) except asyncio.CancelledError: @@ -423,9 +425,11 @@ async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sle ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" await client.close() + @pytest.mark.asyncio async def test__manage_channel_random(): import random + with mock.patch.object(asyncio, "sleep") as sleep: with mock.patch.object(random, "uniform") as uniform: uniform.return_value = 0 @@ -450,6 +454,7 @@ async def test__manage_channel_random(): assert found_min == min_val assert found_max == max_val + @pytest.mark.asyncio @pytest.mark.parametrize("num_cycles", [0, 1, 10, 100]) async def test__manage_channel_refresh(num_cycles): From d47c9416c94bb7505494fc73215276b4c5e1847a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 15:30:44 -0700 Subject: [PATCH 240/349] changed comment --- google/cloud/bigtable/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 9a039bfb0..b87f3ba8e 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -399,7 +399,7 @@ async def read_rows_stream( if self.app_profile_id: request["app_profile_id"] = self.app_profile_id - # read_rows smart retries is implemented using a series of generators: + # read_rows smart retries is implemented using a series of iterators: # - client.read_rows: outputs raw ReadRowsResponse objects from backend. Has per_request_timeout # - RowMerger.merge_row_response_stream: parses chunks into rows # - RowMerger.retryable_merge_rows: adds retries, caching, revised requests, per_row_timeout, per_row_timeout From d1bd12868a44c70b0e759ac6ab886f5661e1aeac Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 15:36:37 -0700 Subject: [PATCH 241/349] added comments to iterator --- google/cloud/bigtable/iterators.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index cf02bd314..6626cb64b 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -32,7 +32,7 @@ class ReadRowsIterator(AsyncIterable[Row]): """ - User-facing async generator for streaming read_rows responses + Async iterator for ReadRows responses. """ def __init__(self, merger: _RowMerger): @@ -42,6 +42,15 @@ def __init__(self, merger: _RowMerger): self._idle_timeout_task: asyncio.Task[None] | None = None async def _start_idle_timer(self, idle_timeout: float): + """ + Start a coroutine that will cancel a stream if no interaction + with the iterator occurs for the specified number of seconds. + + Subsequent access to the iterator will raise an IdleTimeout exception. + + Args: + - idle_timeout: number of seconds of inactivity before cancelling the stream + """ self.last_interaction_time = time.time() if self._idle_timeout_task is not None: self._idle_timeout_task.cancel() @@ -52,9 +61,16 @@ async def _start_idle_timer(self, idle_timeout: float): self._idle_timeout_task.name = "ReadRowsIterator._idle_timeout" def active(self): + """ + Returns True if the iterator is still active and has not been closed + """ return isinstance(self._merger_or_error, _RowMerger) async def _idle_timeout_coroutine(self, idle_timeout: float): + """ + Coroutine that will cancel a stream if no interaction with the iterator + in the last `idle_timeout` seconds. + """ while self.active(): next_timeout = self.last_interaction_time + idle_timeout await asyncio.sleep(next_timeout - time.time()) @@ -66,9 +82,16 @@ async def _idle_timeout_coroutine(self, idle_timeout: float): await self._finish_with_error(IdleTimeout("idle timeout expired")) def __aiter__(self): + """Implement the async iterator protocol.""" return self async def __anext__(self) -> Row: + """ + Implement the async iterator potocol. + + Return the next item in the stream if active, or + raise an exception if the stream has been closed. + """ if isinstance(self._merger_or_error, Exception): raise self._merger_or_error else: @@ -99,6 +122,10 @@ async def __anext__(self) -> Row: raise e async def _finish_with_error(self, e: Exception): + """ + Helper function to close the stream and clean up resources + after an error has occurred. + """ if isinstance(self._merger_or_error, _RowMerger): await self._merger_or_error.aclose() del self._merger_or_error From 039d623254c00c7eb69a70454d5f2f5ab700d2b2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 15:40:15 -0700 Subject: [PATCH 242/349] added var for idle timeout --- google/cloud/bigtable/client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index b87f3ba8e..7626ef46d 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -414,7 +414,8 @@ async def read_rows_stream( ) output_generator = ReadRowsIterator(row_merger) # add idle timeout to clear resources if generator is abandoned - await output_generator._start_idle_timer(600) + idle_timeout_seconds = 600 + await output_generator._start_idle_timer(idle_timeout_seconds) return output_generator async def read_rows( From 3d34dcd8f5503b1a659f8832aaa496308b45aaa3 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 6 Apr 2023 15:49:14 -0700 Subject: [PATCH 243/349] sped up acceptance tests --- tests/unit/test_read_rows_acceptance.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index dfb9dc83e..238c1b59e 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -107,12 +107,9 @@ async def inner(): table = client.get_table("instance", "table") results = [] with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( - test_case.chunks - ) - async for row in await table.read_rows_stream( - query={}, operation_timeout=0.02 - ): + # run once, then return error on retry + read_rows.side_effect = [_make_gapic_stream(test_case.chunks), RuntimeError] + async for row in await table.read_rows_stream(query={}): for cell in row: cell_result = ReadRowsTest.Result( row_key=cell.row_key, @@ -123,10 +120,7 @@ async def inner(): label=cell.labels[0] if cell.labels else "", ) results.append(cell_result) - except Exception as e: - retry_exc = e.__cause__ - root_exc = retry_exc.exceptions[0] - assert isinstance(root_exc, InvalidChunk) + except RuntimeError: results.append(ReadRowsTest.Result(error=True)) finally: await client.close() From 70fbff9b0ec612284b65df9181c322e8cbc39354 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 7 Apr 2023 11:53:43 -0700 Subject: [PATCH 244/349] reduced size of template by making subclass --- gapic-generator-fork | 2 +- .../transports/pooled_grpc_asyncio.py | 301 +----------------- 2 files changed, 6 insertions(+), 297 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index b4ed4d2ea..8d3e0f3df 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit b4ed4d2ea730f8ed23a8c571daa6affd19ea3684 +Subproject commit 8d3e0f3df3b644d84a8ba0ed2b3701b6918a99f4 diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index c14d8a42b..0b937c566 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -39,7 +39,7 @@ from google.cloud.bigtable_v2.types import bigtable from .base import BigtableTransport, DEFAULT_CLIENT_INFO -from .grpc import BigtableGrpcTransport +from .grpc_asyncio import BigtableGrpcAsyncIOTransport class PooledMultiCallable: @@ -173,7 +173,7 @@ async def replace_channel( return new_channel -class PooledBigtableGrpcAsyncIOTransport(BigtableTransport): +class PooledBigtableGrpcAsyncIOTransport(BigtableGrpcAsyncIOTransport): """Pooled gRPC AsyncIO backend transport for Bigtable. Service for reading from and writing to existing Bigtable @@ -219,7 +219,7 @@ def create_channel( ) -> aio.Channel: """Create and return a PooledChannel object, representing a pool of gRPC AsyncIO channels Args: - pool_size (int): the number of channels in the pool + pool_size (int): The number of channels in the pool. host (Optional[str]): The host for the channel to use. credentials (Optional[~.Credentials]): The authorization credentials to attach to requests. These @@ -347,7 +347,8 @@ def __init__( ) # The base transport sets the host, credentials and scopes - super().__init__( + BigtableTransport.__init__( + self, host=host, credentials=credentials, credentials_file=credentials_file, @@ -378,16 +379,6 @@ def __init__( # Wrap messages. This must be done after self._grpc_channel exists self._prep_wrapped_messages(client_info) - @property - def grpc_channel(self) -> aio.Channel: - """Create the channel designed to connect to this service. - - This property caches on the instance; repeated calls return - the same channel. - """ - # Return the channel from cache. - return self._grpc_channel - async def replace_channel( self, channel_idx, grace=None, swap_sleep=1, new_channel=None ) -> aio.Channel: @@ -411,287 +402,5 @@ async def replace_channel( channel_idx, grace, swap_sleep, new_channel ) - @property - def read_rows( - self, - ) -> Callable[[bigtable.ReadRowsRequest], Awaitable[bigtable.ReadRowsResponse]]: - r"""Return a callable for the read rows method over gRPC. - - Streams back the contents of all requested rows in - key order, optionally applying the same Reader filter to - each. Depending on their size, rows and cells may be - broken up across multiple responses, but atomicity of - each row will still be preserved. See the - ReadRowsResponse documentation for details. - - Returns: - Callable[[~.ReadRowsRequest], - Awaitable[~.ReadRowsResponse]]: - A function that, when called, will call the underlying RPC - on the server. - """ - # Generate a "stub function" on-the-fly which will actually make - # the request. - # gRPC handles serialization and deserialization, so we just need - # to pass in the functions for each. - if "read_rows" not in self._stubs: - self._stubs["read_rows"] = self.grpc_channel.unary_stream( - "/google.bigtable.v2.Bigtable/ReadRows", - request_serializer=bigtable.ReadRowsRequest.serialize, - response_deserializer=bigtable.ReadRowsResponse.deserialize, - ) - return self._stubs["read_rows"] - - @property - def sample_row_keys( - self, - ) -> Callable[ - [bigtable.SampleRowKeysRequest], Awaitable[bigtable.SampleRowKeysResponse] - ]: - r"""Return a callable for the sample row keys method over gRPC. - - Returns a sample of row keys in the table. The - returned row keys will delimit contiguous sections of - the table of approximately equal size, which can be used - to break up the data for distributed tasks like - mapreduces. - - Returns: - Callable[[~.SampleRowKeysRequest], - Awaitable[~.SampleRowKeysResponse]]: - A function that, when called, will call the underlying RPC - on the server. - """ - # Generate a "stub function" on-the-fly which will actually make - # the request. - # gRPC handles serialization and deserialization, so we just need - # to pass in the functions for each. - if "sample_row_keys" not in self._stubs: - self._stubs["sample_row_keys"] = self.grpc_channel.unary_stream( - "/google.bigtable.v2.Bigtable/SampleRowKeys", - request_serializer=bigtable.SampleRowKeysRequest.serialize, - response_deserializer=bigtable.SampleRowKeysResponse.deserialize, - ) - return self._stubs["sample_row_keys"] - - @property - def mutate_row( - self, - ) -> Callable[[bigtable.MutateRowRequest], Awaitable[bigtable.MutateRowResponse]]: - r"""Return a callable for the mutate row method over gRPC. - - Mutates a row atomically. Cells already present in the row are - left unchanged unless explicitly changed by ``mutation``. - - Returns: - Callable[[~.MutateRowRequest], - Awaitable[~.MutateRowResponse]]: - A function that, when called, will call the underlying RPC - on the server. - """ - # Generate a "stub function" on-the-fly which will actually make - # the request. - # gRPC handles serialization and deserialization, so we just need - # to pass in the functions for each. - if "mutate_row" not in self._stubs: - self._stubs["mutate_row"] = self.grpc_channel.unary_unary( - "/google.bigtable.v2.Bigtable/MutateRow", - request_serializer=bigtable.MutateRowRequest.serialize, - response_deserializer=bigtable.MutateRowResponse.deserialize, - ) - return self._stubs["mutate_row"] - - @property - def mutate_rows( - self, - ) -> Callable[[bigtable.MutateRowsRequest], Awaitable[bigtable.MutateRowsResponse]]: - r"""Return a callable for the mutate rows method over gRPC. - - Mutates multiple rows in a batch. Each individual row - is mutated atomically as in MutateRow, but the entire - batch is not executed atomically. - - Returns: - Callable[[~.MutateRowsRequest], - Awaitable[~.MutateRowsResponse]]: - A function that, when called, will call the underlying RPC - on the server. - """ - # Generate a "stub function" on-the-fly which will actually make - # the request. - # gRPC handles serialization and deserialization, so we just need - # to pass in the functions for each. - if "mutate_rows" not in self._stubs: - self._stubs["mutate_rows"] = self.grpc_channel.unary_stream( - "/google.bigtable.v2.Bigtable/MutateRows", - request_serializer=bigtable.MutateRowsRequest.serialize, - response_deserializer=bigtable.MutateRowsResponse.deserialize, - ) - return self._stubs["mutate_rows"] - - @property - def check_and_mutate_row( - self, - ) -> Callable[ - [bigtable.CheckAndMutateRowRequest], - Awaitable[bigtable.CheckAndMutateRowResponse], - ]: - r"""Return a callable for the check and mutate row method over gRPC. - - Mutates a row atomically based on the output of a - predicate Reader filter. - - Returns: - Callable[[~.CheckAndMutateRowRequest], - Awaitable[~.CheckAndMutateRowResponse]]: - A function that, when called, will call the underlying RPC - on the server. - """ - # Generate a "stub function" on-the-fly which will actually make - # the request. - # gRPC handles serialization and deserialization, so we just need - # to pass in the functions for each. - if "check_and_mutate_row" not in self._stubs: - self._stubs["check_and_mutate_row"] = self.grpc_channel.unary_unary( - "/google.bigtable.v2.Bigtable/CheckAndMutateRow", - request_serializer=bigtable.CheckAndMutateRowRequest.serialize, - response_deserializer=bigtable.CheckAndMutateRowResponse.deserialize, - ) - return self._stubs["check_and_mutate_row"] - - @property - def ping_and_warm( - self, - ) -> Callable[ - [bigtable.PingAndWarmRequest], Awaitable[bigtable.PingAndWarmResponse] - ]: - r"""Return a callable for the ping and warm method over gRPC. - - Warm up associated instance metadata for this - connection. This call is not required but may be useful - for connection keep-alive. - - Returns: - Callable[[~.PingAndWarmRequest], - Awaitable[~.PingAndWarmResponse]]: - A function that, when called, will call the underlying RPC - on the server. - """ - # Generate a "stub function" on-the-fly which will actually make - # the request. - # gRPC handles serialization and deserialization, so we just need - # to pass in the functions for each. - if "ping_and_warm" not in self._stubs: - self._stubs["ping_and_warm"] = self.grpc_channel.unary_unary( - "/google.bigtable.v2.Bigtable/PingAndWarm", - request_serializer=bigtable.PingAndWarmRequest.serialize, - response_deserializer=bigtable.PingAndWarmResponse.deserialize, - ) - return self._stubs["ping_and_warm"] - - @property - def read_modify_write_row( - self, - ) -> Callable[ - [bigtable.ReadModifyWriteRowRequest], - Awaitable[bigtable.ReadModifyWriteRowResponse], - ]: - r"""Return a callable for the read modify write row method over gRPC. - - Modifies a row atomically on the server. The method - reads the latest existing timestamp and value from the - specified columns and writes a new entry based on - pre-defined read/modify/write rules. The new value for - the timestamp is the greater of the existing timestamp - or the current server time. The method returns the new - contents of all modified cells. - - Returns: - Callable[[~.ReadModifyWriteRowRequest], - Awaitable[~.ReadModifyWriteRowResponse]]: - A function that, when called, will call the underlying RPC - on the server. - """ - # Generate a "stub function" on-the-fly which will actually make - # the request. - # gRPC handles serialization and deserialization, so we just need - # to pass in the functions for each. - if "read_modify_write_row" not in self._stubs: - self._stubs["read_modify_write_row"] = self.grpc_channel.unary_unary( - "/google.bigtable.v2.Bigtable/ReadModifyWriteRow", - request_serializer=bigtable.ReadModifyWriteRowRequest.serialize, - response_deserializer=bigtable.ReadModifyWriteRowResponse.deserialize, - ) - return self._stubs["read_modify_write_row"] - - @property - def generate_initial_change_stream_partitions( - self, - ) -> Callable[ - [bigtable.GenerateInitialChangeStreamPartitionsRequest], - Awaitable[bigtable.GenerateInitialChangeStreamPartitionsResponse], - ]: - r"""Return a callable for the generate initial change stream - partitions method over gRPC. - - NOTE: This API is intended to be used by Apache Beam BigtableIO. - Returns the current list of partitions that make up the table's - change stream. The union of partitions will cover the entire - keyspace. Partitions can be read with ``ReadChangeStream``. - - Returns: - Callable[[~.GenerateInitialChangeStreamPartitionsRequest], - Awaitable[~.GenerateInitialChangeStreamPartitionsResponse]]: - A function that, when called, will call the underlying RPC - on the server. - """ - # Generate a "stub function" on-the-fly which will actually make - # the request. - # gRPC handles serialization and deserialization, so we just need - # to pass in the functions for each. - if "generate_initial_change_stream_partitions" not in self._stubs: - self._stubs[ - "generate_initial_change_stream_partitions" - ] = self.grpc_channel.unary_stream( - "/google.bigtable.v2.Bigtable/GenerateInitialChangeStreamPartitions", - request_serializer=bigtable.GenerateInitialChangeStreamPartitionsRequest.serialize, - response_deserializer=bigtable.GenerateInitialChangeStreamPartitionsResponse.deserialize, - ) - return self._stubs["generate_initial_change_stream_partitions"] - - @property - def read_change_stream( - self, - ) -> Callable[ - [bigtable.ReadChangeStreamRequest], Awaitable[bigtable.ReadChangeStreamResponse] - ]: - r"""Return a callable for the read change stream method over gRPC. - - NOTE: This API is intended to be used by Apache Beam - BigtableIO. Reads changes from a table's change stream. - Changes will reflect both user-initiated mutations and - mutations that are caused by garbage collection. - - Returns: - Callable[[~.ReadChangeStreamRequest], - Awaitable[~.ReadChangeStreamResponse]]: - A function that, when called, will call the underlying RPC - on the server. - """ - # Generate a "stub function" on-the-fly which will actually make - # the request. - # gRPC handles serialization and deserialization, so we just need - # to pass in the functions for each. - if "read_change_stream" not in self._stubs: - self._stubs["read_change_stream"] = self.grpc_channel.unary_stream( - "/google.bigtable.v2.Bigtable/ReadChangeStream", - request_serializer=bigtable.ReadChangeStreamRequest.serialize, - response_deserializer=bigtable.ReadChangeStreamResponse.deserialize, - ) - return self._stubs["read_change_stream"] - - def close(self): - return self.grpc_channel.close() - __all__ = ("PooledBigtableGrpcAsyncIOTransport",) From 383d8eb723f84308697a47644d7e79124f86bab2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 7 Apr 2023 11:59:06 -0700 Subject: [PATCH 245/349] reverted unintentional gapic generation changes --- .../bigtable_v2/services/bigtable/async_client.py | 15 +++++++-------- .../cloud/bigtable_v2/services/bigtable/client.py | 3 --- .../services/bigtable/transports/rest.py | 9 +++++++++ 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigtable_v2/services/bigtable/async_client.py b/google/cloud/bigtable_v2/services/bigtable/async_client.py index 1233e1288..3465569b3 100644 --- a/google/cloud/bigtable_v2/services/bigtable/async_client.py +++ b/google/cloud/bigtable_v2/services/bigtable/async_client.py @@ -807,8 +807,8 @@ async def ping_and_warm( Args: request (Optional[Union[google.cloud.bigtable_v2.types.PingAndWarmRequest, dict]]): - The request object. Request message for client connection - keep-alive and warming. + The request object. Request message for client + connection keep-alive and warming. name (:class:`str`): Required. The unique name of the instance to check permissions for as well as respond. Values are of the @@ -1027,9 +1027,8 @@ def generate_initial_change_stream_partitions( Args: request (Optional[Union[google.cloud.bigtable_v2.types.GenerateInitialChangeStreamPartitionsRequest, dict]]): - The request object. NOTE: This API is intended to be used - by Apache Beam BigtableIO. Request - message for + The request object. NOTE: This API is intended to be + used by Apache Beam BigtableIO. Request message for Bigtable.GenerateInitialChangeStreamPartitions. table_name (:class:`str`): Required. The unique name of the table from which to get @@ -1127,9 +1126,9 @@ def read_change_stream( Args: request (Optional[Union[google.cloud.bigtable_v2.types.ReadChangeStreamRequest, dict]]): - The request object. NOTE: This API is intended to be used - by Apache Beam BigtableIO. Request - message for Bigtable.ReadChangeStream. + The request object. NOTE: This API is intended to be + used by Apache Beam BigtableIO. Request message for + Bigtable.ReadChangeStream. table_name (:class:`str`): Required. The unique name of the table from which to read a change stream. Values are of the form diff --git a/google/cloud/bigtable_v2/services/bigtable/client.py b/google/cloud/bigtable_v2/services/bigtable/client.py index 3165f9160..60622509a 100644 --- a/google/cloud/bigtable_v2/services/bigtable/client.py +++ b/google/cloud/bigtable_v2/services/bigtable/client.py @@ -382,9 +382,6 @@ def __init__( transport (Union[str, BigtableTransport]): The transport to use. If set to None, a transport is chosen automatically. - NOTE: "rest" transport functionality is currently in a - beta state (preview). We welcome your feedback via an - issue in this library's source repository. client_options (Optional[Union[google.api_core.client_options.ClientOptions, dict]]): Custom options for the client. It won't take effect if a ``transport`` instance is provided. (1) The ``api_endpoint`` property can be used to override the diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/rest.py b/google/cloud/bigtable_v2/services/bigtable/transports/rest.py index 4343fbb90..ee9cb046f 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/rest.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/rest.py @@ -471,6 +471,7 @@ def __call__( request (~.bigtable.CheckAndMutateRowRequest): The request object. Request message for Bigtable.CheckAndMutateRow. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -574,6 +575,7 @@ def __call__( by Apache Beam BigtableIO. Request message for Bigtable.GenerateInitialChangeStreamPartitions. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -682,6 +684,7 @@ def __call__( request (~.bigtable.MutateRowRequest): The request object. Request message for Bigtable.MutateRow. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -780,6 +783,7 @@ def __call__( request (~.bigtable.MutateRowsRequest): The request object. Request message for BigtableService.MutateRows. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -877,6 +881,7 @@ def __call__( request (~.bigtable.PingAndWarmRequest): The request object. Request message for client connection keep-alive and warming. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -977,6 +982,7 @@ def __call__( The request object. NOTE: This API is intended to be used by Apache Beam BigtableIO. Request message for Bigtable.ReadChangeStream. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1077,6 +1083,7 @@ def __call__( request (~.bigtable.ReadModifyWriteRowRequest): The request object. Request message for Bigtable.ReadModifyWriteRow. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1177,6 +1184,7 @@ def __call__( request (~.bigtable.ReadRowsRequest): The request object. Request message for Bigtable.ReadRows. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1272,6 +1280,7 @@ def __call__( request (~.bigtable.SampleRowKeysRequest): The request object. Request message for Bigtable.SampleRowKeys. + retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. From 018fe03feef414d23bb2524133099116a89b5577 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 7 Apr 2023 12:37:43 -0700 Subject: [PATCH 246/349] updated submodule --- gapic-generator-fork | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 8d3e0f3df..14e00e28d 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 8d3e0f3df3b644d84a8ba0ed2b3701b6918a99f4 +Subproject commit 14e00e28d4fa7c50cbfbffe754e31d060285ee7b From 3764a98e3bb917ea6acac89455a331fa453d25d5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 7 Apr 2023 13:52:41 -0700 Subject: [PATCH 247/349] added default timeouts to table surface --- google/cloud/bigtable/client.py | 40 ++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 7626ef46d..9bb144599 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -310,6 +310,10 @@ def __init__( instance_id: str, table_id: str, app_profile_id: str | None = None, + *, + default_operation_timeout: float = 60, + default_per_row_timeout: float | None = 10, + default_per_request_timeout: float | None = None, ): """ Initialize a Table instance @@ -321,8 +325,13 @@ def __init__( instance_id is combined with the client's project to fully specify the instance table_id: The ID of the table. - app_profile_id: (Optional) The app profile to associate with requests. + app_profile_id: The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles + default_operation_timeout: (Optional) The default timeout, in seconds + default_per_row_timeout: (Optional) The default timeout for individual + rows in all read_rows requests, in seconds + default_per_request_timeout: (Optional) The default timeout for individual + rpc requests, in seconds Raises: - RuntimeError if called outside of an async run loop context """ @@ -335,6 +344,9 @@ def __init__( self.client.project, instance_id, table_id ) self.app_profile_id = app_profile_id + self.default_operation_timeout = default_operation_timeout + self.default_per_row_timeout = default_per_row_timeout + self.default_per_request_timeout = default_per_request_timeout # raises RuntimeError if called outside of an async run loop context try: self._register_instance_task = asyncio.create_task( @@ -352,8 +364,8 @@ async def read_rows_stream( query: ReadRowsQuery | dict[str, Any], *, cache_size: int = 0, - operation_timeout: float = 60, - per_row_timeout: float | None = 10, + operation_timeout: float | None = None, + per_row_timeout: float | None = None, per_request_timeout: float | None = None, ) -> ReadRowsIterator: """ @@ -374,14 +386,17 @@ async def read_rows_stream( Failed requests will be retried within the budget. time is only counted while actively waiting on the network. Completed and cached results can still be accessed after the deadline is complete, - with a DeadlineExceeded exception only raised after cached results are exhausted + with a DeadlineExceeded exception only raised after cached results are exhausted. + If None, defaults to the Table's default_operation_timeout - per_row_timeout: the time budget for a single row read, in seconds. If a row takes longer than per_row_timeout to complete, the ongoing network request will be with a DeadlineExceeded exception, and a retry may be attempted Applies only to the underlying network call. + If None, defaults to the Table's default_per_row_timeout - per_request_timeout: the time budget for an individual network request, in seconds. If it takes longer than this time to complete, the request will be cancelled with - a DeadlineExceeded exception, and a retry will be attempted + a DeadlineExceeded exception, and a retry will be attempted. + If None, defaults to the Table's default_per_request_timeout Returns: - an asynchronous iterator that yields rows returned by the query @@ -392,8 +407,17 @@ async def read_rows_stream( - GoogleAPIError: raised if the request encounters an unrecoverable error - IdleTimeout: if iterator was abandoned """ + + operation_timeout = operation_timeout or self.default_operation_timeout + per_row_timeout = per_row_timeout or self.default_per_row_timeout + per_request_timeout = per_request_timeout or self.default_per_request_timeout + if operation_timeout <= 0: raise ValueError("operation_timeout must be greater than 0") + if per_row_timeout is not None and per_row_timeout <= 0: + raise ValueError("per_row_timeout must be greater than 0") + if per_request_timeout is not None and per_request_timeout <= 0: + raise ValueError("per_request_timeout must be greater than 0") request = query._to_dict() if isinstance(query, ReadRowsQuery) else query request["table_name"] = self.table_path if self.app_profile_id: @@ -422,9 +446,9 @@ async def read_rows( self, query: ReadRowsQuery | dict[str, Any], *, - operation_timeout: float = 60, - per_row_timeout: int | float | None = 10, - per_request_timeout: int | float | None = None, + operation_timeout: float | None = None, + per_row_timeout: float | None = None, + per_request_timeout: float | None = None, ) -> list[Row]: """ Helper function that returns a full list instead of a generator From 745ae380399c32997db47c14131d762bd78782af Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 12 Apr 2023 17:24:12 -0700 Subject: [PATCH 248/349] end after row_limit rows --- google/cloud/bigtable/_row_merger.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index bd21b3720..da330e733 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -87,9 +87,11 @@ def __init__( """ self.last_seen_row_key: bytes | None = None self.emitted_rows: Set[bytes] = set() + self.emit_count = 0 cache_size = max(cache_size, 0) self.request = request self.operation_timeout = operation_timeout + row_limit = request.get("rows_limit", 0) # lock in paramters for retryable wrapper self.partial_retryable = partial( self.retryable_merge_rows, @@ -98,6 +100,7 @@ def __init__( per_row_timeout, per_request_timeout, revise_on_retry, + row_limit, ) predicate = retries.if_exception_type( InvalidChunk, @@ -160,6 +163,7 @@ async def retryable_merge_rows( per_row_timeout, per_request_timeout, revise_on_retry, + row_limit, ) -> AsyncGenerator[Row | RequestStats, None]: """ Retryable wrapper for merge_rows. This function is called each time @@ -218,6 +222,9 @@ async def retryable_merge_rows( if not isinstance(new_item, _LastScannedRow): self.emitted_rows.add(new_item.row_key) yield new_item + self.emit_count += 1 + if row_limit and self.emit_count >= row_limit: + return # start new task for cache get_from_cache_task = asyncio.create_task(cache.get()) await asyncio.sleep(0) From 3d11d55c374b9e98868abef21084d684145da8db Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 12 Apr 2023 17:24:47 -0700 Subject: [PATCH 249/349] changed retryable exceptions --- google/cloud/bigtable/_row_merger.py | 4 ++-- google/cloud/bigtable/exceptions.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index da330e733..38af9175b 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -103,9 +103,9 @@ def __init__( row_limit, ) predicate = retries.if_exception_type( - InvalidChunk, core_exceptions.ServerError, core_exceptions.TooManyRequests, + core_exceptions.Aborted, ) def on_error_fn(exc): @@ -388,7 +388,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> Row | None: and chunk.row_key and self.last_seen_row_key >= chunk.row_key ): - raise InvalidChunk("Out of order row keys") + raise InvalidChunk("row keys should be strictly increasing") if chunk.reset_row: # reset row if requested self._handle_reset_chunk(chunk) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index bdde7c173..8ab4642c2 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -29,7 +29,7 @@ class IdleTimeout(core_exceptions.DeadlineExceeded): pass -class InvalidChunk(core_exceptions.ServerError): +class InvalidChunk(core_exceptions.GoogleAPICallError): """Exception raised to invalid chunk data from back-end.""" From f0403e7fb12443212cec1e2eabba7d6794fb5aa9 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 12 Apr 2023 17:25:41 -0700 Subject: [PATCH 250/349] changed warning stack level --- google/cloud/bigtable/client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 501180db8..d2c74264f 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -124,6 +124,7 @@ def __init__( f"{self.__class__.__name__} should be started in an " "asyncio event loop. Channel refresh will not be started", RuntimeWarning, + stacklevel=2, ) def start_background_channel_refresh(self) -> None: @@ -334,6 +335,7 @@ def __init__( "Table should be created in an asyncio event loop." " Instance will not be registered with client for refresh", RuntimeWarning, + stacklevel=2, ) async def read_rows_stream( From 84a775ac6e92458ea1314f1e763d70c12c4eab53 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 12 Apr 2023 18:14:34 -0700 Subject: [PATCH 251/349] changed retryable errors --- google/cloud/bigtable/_row_merger.py | 9 ++++++--- tests/unit/test_client_read_rows.py | 19 +++++++++++-------- tests/unit/test_read_rows_acceptance.py | 4 ++-- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 38af9175b..11926a30f 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -103,8 +103,8 @@ def __init__( row_limit, ) predicate = retries.if_exception_type( - core_exceptions.ServerError, - core_exceptions.TooManyRequests, + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, core_exceptions.Aborted, ) @@ -185,7 +185,10 @@ async def retryable_merge_rows( last_seen_row_key=self.last_seen_row_key, emitted_rows=self.emitted_rows, ) - new_gapic_stream = await gapic_fn(self.request, timeout=per_request_timeout) + new_gapic_stream = await gapic_fn( + self.request, + timeout=per_request_timeout, + ) cache: asyncio.Queue[Row | RequestStats] = asyncio.Queue(maxsize=cache_size) state_machine = _StateMachine() try: diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 6465a8262..a7e6bc709 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -335,12 +335,9 @@ async def test_read_rows_idle_timeout(): @pytest.mark.parametrize( "exc_type", [ - InvalidChunk, + core_exceptions.Aborted, core_exceptions.DeadlineExceeded, - core_exceptions.InternalServerError, core_exceptions.ServiceUnavailable, - core_exceptions.TooManyRequests, - core_exceptions.ResourceExhausted, ], ) @pytest.mark.asyncio @@ -370,7 +367,10 @@ async def test_read_rows_retryable_error(exc_type): core_exceptions.NotFound, core_exceptions.PermissionDenied, core_exceptions.Conflict, - core_exceptions.Aborted, + core_exceptions.InternalServerError, + core_exceptions.TooManyRequests, + core_exceptions.ResourceExhausted, + InvalidChunk, ], ) @pytest.mark.asyncio @@ -428,13 +428,16 @@ async def test_read_rows_revise_request(): with mock.patch.object(_RowMerger, "aclose"): revise_rowset.side_effect = [ "modified", - core_exceptions.Aborted("mock error"), + core_exceptions.Cancelled("mock error"), ] async with _make_client() as client: table = client.get_table("instance", "table") row_keys = [b"test_1", b"test_2", b"test_3"] query = ReadRowsQuery(row_keys=row_keys) - chunks = [_make_chunk(row_key=b"test_1"), InvalidChunk("mock error")] + chunks = [ + _make_chunk(row_key=b"test_1"), + core_exceptions.Aborted("mock retryable error"), + ] with mock.patch.object( table.client._gapic_client, "read_rows" ) as read_rows: @@ -443,7 +446,7 @@ async def test_read_rows_revise_request(): ) try: await table.read_rows(query) - except core_exceptions.Aborted: + except core_exceptions.Cancelled: revise_rowset.assert_called() first_call_kwargs = revise_rowset.call_args_list[0].kwargs assert first_call_kwargs["row_set"] == query._to_dict()["rows"] diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index 238c1b59e..94b4e3829 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -108,7 +108,7 @@ async def inner(): results = [] with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: # run once, then return error on retry - read_rows.side_effect = [_make_gapic_stream(test_case.chunks), RuntimeError] + read_rows.return_value = _make_gapic_stream(test_case.chunks) async for row in await table.read_rows_stream(query={}): for cell in row: cell_result = ReadRowsTest.Result( @@ -120,7 +120,7 @@ async def inner(): label=cell.labels[0] if cell.labels else "", ) results.append(cell_result) - except RuntimeError: + except InvalidChunk: results.append(ReadRowsTest.Result(error=True)) finally: await client.close() From 15a9d235fc4e0d800331a98e2e30abf4a852219f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 12 Apr 2023 18:17:09 -0700 Subject: [PATCH 252/349] improved comments --- google/cloud/bigtable/_row_merger.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 11926a30f..83adff679 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -41,9 +41,9 @@ into Row objects. - RowMerger is the highest level class, providing an interface for asynchronous - merging with or without retrues + merging end-to-end - StateMachine is used internally to track the state of the merge, including - rows the current row and the keys of the rows that have been processed. + the current row key and the keys of the rows that have been processed. It processes a stream of chunks, and will raise InvalidChunk if it reaches an invalid state. - State classes track the current state of the StateMachine, and define what @@ -58,7 +58,7 @@ class _RowMerger(AsyncIterable[Row]): into a stream of Row objects. RowMerger.merge_row_response_stream takes in a stream of ReadRowsResponse - and handles turns them into a stream of Row objects using an internal + and turns them into a stream of Row objects using an internal StateMachine. RowMerger(request, client) handles row merging logic end-to-end, including From 8636654270ec0e5f161419f279d097877152ecba Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 12 Apr 2023 18:36:30 -0700 Subject: [PATCH 253/349] improved idle timeouts --- google/cloud/bigtable/client.py | 3 +-- google/cloud/bigtable/iterators.py | 9 ++++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 9bb144599..4cc98980f 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -438,7 +438,7 @@ async def read_rows_stream( ) output_generator = ReadRowsIterator(row_merger) # add idle timeout to clear resources if generator is abandoned - idle_timeout_seconds = 600 + idle_timeout_seconds = 300 await output_generator._start_idle_timer(idle_timeout_seconds) return output_generator @@ -492,7 +492,6 @@ async def read_rows_sharded( cache_size_limit: int | None = None, operation_timeout: int | float | None = 60, per_row_timeout: int | float | None = 10, - idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, ) -> ReadRowsIterator: """ diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index 6626cb64b..6a6debf41 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -79,7 +79,14 @@ async def _idle_timeout_coroutine(self, idle_timeout: float): and self.active() ): # idle timeout has expired - await self._finish_with_error(IdleTimeout("idle timeout expired")) + await self._finish_with_error( + IdleTimeout( + ( + "Timed out waiting for next row to be consumed " + f"(idle_timeout is {idle_timeout:0.1f}s)." + ) + ) + ) def __aiter__(self): """Implement the async iterator protocol.""" From 1aca392d37925635becb072b20a8fdfefd916a18 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 12 Apr 2023 18:42:24 -0700 Subject: [PATCH 254/349] changed retry parameters --- google/cloud/bigtable/_row_merger.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 83adff679..de78a5d81 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -83,6 +83,7 @@ def __init__( - cache_size: the size of the buffer to use for caching rows from the network - operation_timeout: the timeout to use for the entire operation, in seconds - per_row_timeout: the timeout to use when waiting for each individual row, in seconds + - per_request_timeout: the timeout to use when waiting for each individual grpc request, in seconds - revise_on_retry: if True, retried request will be modified based on rows that have already been seen """ self.last_seen_row_key: bytes | None = None @@ -117,7 +118,7 @@ def on_error_fn(exc): timeout=self.operation_timeout, initial=0.1, multiplier=2, - maximum=1, + maximum=60, on_error=on_error_fn, is_generator=True, ) From 45fef1e7b35fcc3f1fb142c45e956ac0beb1b62b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 12 Apr 2023 18:43:36 -0700 Subject: [PATCH 255/349] added limit revision to each retry --- google/cloud/bigtable/_row_merger.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index de78a5d81..1322eba4a 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -186,6 +186,13 @@ async def retryable_merge_rows( last_seen_row_key=self.last_seen_row_key, emitted_rows=self.emitted_rows, ) + # revise next request's row limit based on number emitted + if row_limit: + new_limit = row_limit - self.emit_count + if new_limit <= 0: + return + else: + self.request["rows_limit"] = new_limit new_gapic_stream = await gapic_fn( self.request, timeout=per_request_timeout, From 951a77bc098f0d4833086621b8c1a3739644b9bf Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 12 Apr 2023 18:43:52 -0700 Subject: [PATCH 256/349] removed unneeded check --- google/cloud/bigtable/_row_merger.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 1322eba4a..e6fa040f0 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -450,8 +450,6 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): if chunk.value: raise InvalidChunk("Reset chunk has a value") self._reset_row() - if not isinstance(self.current_state, AWAITING_NEW_ROW): - raise InvalidChunk("Failed to reset state machine") class _State(ABC): From e3a0b666c734602e5aa3c54d64702372fe19751e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 12 Apr 2023 18:49:45 -0700 Subject: [PATCH 257/349] fixed idle timeout test --- google/cloud/bigtable/iterators.py | 4 ++-- tests/unit/test_client_read_rows.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index 6a6debf41..bc3bb4ffc 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -82,8 +82,8 @@ async def _idle_timeout_coroutine(self, idle_timeout: float): await self._finish_with_error( IdleTimeout( ( - "Timed out waiting for next row to be consumed " - f"(idle_timeout is {idle_timeout:0.1f}s)." + "Timed out waiting for next Row to be consumed. " + f"(idle_timeout={idle_timeout:0.1f}s)" ) ) ) diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index a7e6bc709..5ddc6c70b 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -327,7 +327,11 @@ async def test_read_rows_idle_timeout(): await client.close() with pytest.raises(IdleTimeout) as e: await gen.__anext__() - assert e.value.message == "idle timeout expired" + + expected_msg = ( + "Timed out waiting for next Row to be consumed. (idle_timeout=0.1s)" + ) + assert e.value.message == expected_msg aclose.assert_called_once() aclose.assert_awaited() From 6089934bc69c0fba0da0dd9dc4a7d8e4b04414e6 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 13 Apr 2023 10:28:26 -0700 Subject: [PATCH 258/349] removed tracking of emitted rows --- google/cloud/bigtable/_row_merger.py | 22 ++++++---------------- tests/unit/test_client_read_rows.py | 2 -- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index e6fa040f0..66d532d7e 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -29,7 +29,6 @@ from typing import ( cast, List, - Set, Any, AsyncIterable, AsyncIterator, @@ -87,7 +86,6 @@ def __init__( - revise_on_retry: if True, retried request will be modified based on rows that have already been seen """ self.last_seen_row_key: bytes | None = None - self.emitted_rows: Set[bytes] = set() self.emit_count = 0 cache_size = max(cache_size, 0) self.request = request @@ -144,7 +142,6 @@ async def aclose(self): await self.stream.aclose() del self.stream self.stream = None - self.emitted_rows.clear() self.last_seen_row_key = None @staticmethod @@ -175,7 +172,7 @@ async def retryable_merge_rows( - cache for the stream - state machine to hold merge chunks received from stream Some state is shared between retries: - - last_seen_row_key and emitted_rows are used to ensure that + - last_seen_row_key is used to ensure that duplicate rows are not emitted - request is stored and (optionally) modified on each retry """ @@ -184,7 +181,6 @@ async def retryable_merge_rows( self.request["rows"] = _RowMerger._revise_request_rowset( row_set=self.request.get("rows", None), last_seen_row_key=self.last_seen_row_key, - emitted_rows=self.emitted_rows, ) # revise next request's row limit based on number emitted if row_limit: @@ -223,15 +219,15 @@ async def retryable_merge_rows( # don't yield rows that have already been emitted if isinstance(new_item, RequestStats): yield new_item - elif ( - isinstance(new_item, Row) - and new_item.row_key not in self.emitted_rows + # ignore rows that have already been emitted + elif isinstance(new_item, Row) and ( + self.last_seen_row_key is None + or new_item.row_key > self.last_seen_row_key ): self.last_seen_row_key = new_item.row_key # don't yeild _LastScannedRow markers; they # should only update last_seen_row_key if not isinstance(new_item, _LastScannedRow): - self.emitted_rows.add(new_item.row_key) yield new_item self.emit_count += 1 if row_limit and self.emit_count >= row_limit: @@ -262,7 +258,6 @@ async def retryable_merge_rows( def _revise_request_rowset( row_set: dict[str, Any] | None, last_seen_row_key: bytes, - emitted_rows: Set[bytes], ) -> dict[str, Any]: """ Revise the rows in the request to avoid ones we've already processed. @@ -270,7 +265,6 @@ def _revise_request_rowset( Args: - row_set: the row set from the request - last_seen_row_key: the last row key encountered - - emitted_rows: the set of row keys that have already been emitted """ # if user is doing a whole table scan, start a new one with the last seen key if row_set is None: @@ -284,7 +278,7 @@ def _revise_request_rowset( row_keys: list[bytes] = row_set.get("row_keys", []) adjusted_keys = [] for key in row_keys: - if key not in emitted_rows: + if key > last_seen_row_key: adjusted_keys.append(key) # if user specified only a single range, set start to the last seen key row_ranges: list[dict[str, Any]] = row_set.get("row_ranges", []) @@ -344,7 +338,6 @@ class _StateMachine: """ def __init__(self): - self.completed_row_keys: Set[bytes] = set({}) # represents either the last row emitted, or the last_scanned_key sent from backend # all future rows should have keys > last_seen_row_key self.last_seen_row_key: bytes | None = None @@ -392,8 +385,6 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> Row | None: Returns a Row if the chunk completes a row, otherwise returns None """ - if chunk.row_key in self.completed_row_keys: - raise InvalidChunk(f"duplicate row key: {chunk.row_key.decode()}") if ( self.last_seen_row_key and chunk.row_key @@ -425,7 +416,6 @@ def _handle_complete_row(self, complete_row: Row) -> None: or when a scan heartbeat is received """ self.last_seen_row_key = complete_row.row_key - self.completed_row_keys.add(complete_row.row_key) self._reset_row() def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 5ddc6c70b..1e6f062b1 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -455,8 +455,6 @@ async def test_read_rows_revise_request(): first_call_kwargs = revise_rowset.call_args_list[0].kwargs assert first_call_kwargs["row_set"] == query._to_dict()["rows"] assert first_call_kwargs["last_seen_row_key"] == b"test_1" - assert first_call_kwargs["emitted_rows"] == {b"test_1"} second_call_kwargs = revise_rowset.call_args_list[1].kwargs assert second_call_kwargs["row_set"] == "modified" assert second_call_kwargs["last_seen_row_key"] == b"test_1" - assert second_call_kwargs["emitted_rows"] == {b"test_1"} From fb4b0ca7fdb7f46d377d1435fe5a139e67e09e9d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 14 Apr 2023 12:42:27 -0700 Subject: [PATCH 259/349] removed revise_on_retry flag --- google/cloud/bigtable/_row_merger.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 66d532d7e..d991e8535 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -73,7 +73,6 @@ def __init__( operation_timeout: float | None = None, per_row_timeout: float | None = None, per_request_timeout: float | None = None, - revise_on_retry: bool = True, ): """ Args: @@ -83,7 +82,6 @@ def __init__( - operation_timeout: the timeout to use for the entire operation, in seconds - per_row_timeout: the timeout to use when waiting for each individual row, in seconds - per_request_timeout: the timeout to use when waiting for each individual grpc request, in seconds - - revise_on_retry: if True, retried request will be modified based on rows that have already been seen """ self.last_seen_row_key: bytes | None = None self.emit_count = 0 @@ -98,7 +96,6 @@ def __init__( cache_size, per_row_timeout, per_request_timeout, - revise_on_retry, row_limit, ) predicate = retries.if_exception_type( @@ -160,7 +157,6 @@ async def retryable_merge_rows( cache_size, per_row_timeout, per_request_timeout, - revise_on_retry, row_limit, ) -> AsyncGenerator[Row | RequestStats, None]: """ @@ -176,7 +172,7 @@ async def retryable_merge_rows( duplicate rows are not emitted - request is stored and (optionally) modified on each retry """ - if revise_on_retry and self.last_seen_row_key is not None: + if self.last_seen_row_key is not None: # if this is a retry, try to trim down the request to avoid ones we've already processed self.request["rows"] = _RowMerger._revise_request_rowset( row_set=self.request.get("rows", None), From 83b908c7afa80110c71ef24890c47cb8082903b6 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 14 Apr 2023 12:47:09 -0700 Subject: [PATCH 260/349] changed initial sleep --- google/cloud/bigtable/_row_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index d991e8535..c560f87a7 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -111,7 +111,7 @@ def on_error_fn(exc): retry = retries.AsyncRetry( predicate=predicate, timeout=self.operation_timeout, - initial=0.1, + initial=0.01, multiplier=2, maximum=60, on_error=on_error_fn, From 56885614ca9a243a0bfa38a2c3f56e32ee327085 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 14 Apr 2023 12:58:05 -0700 Subject: [PATCH 261/349] added extra timeout check --- google/cloud/bigtable/client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 4cc98980f..31ddd9fdf 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -418,6 +418,8 @@ async def read_rows_stream( raise ValueError("per_row_timeout must be greater than 0") if per_request_timeout is not None and per_request_timeout <= 0: raise ValueError("per_request_timeout must be greater than 0") + if per_request_timeout is not None and per_request_timeout > operation_timeout: + raise ValueError("per_request_timeout must be less than operation_timeout") request = query._to_dict() if isinstance(query, ReadRowsQuery) else query request["table_name"] = self.table_path if self.app_profile_id: From ff3724da3e6906b411e6bab040749d6ad4f38c66 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 17 Apr 2023 13:22:33 -0700 Subject: [PATCH 262/349] removed outdated test --- tests/unit/test_client_read_rows.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 1e6f062b1..a6b5889bf 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -249,7 +249,6 @@ async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): @pytest.mark.parametrize( "per_request_t, operation_t, expected_num", [ - (0.1, 0.01, 0), (0.01, 0.015, 1), (0.05, 0.54, 10), (0.05, 0.14, 2), From 78a309ca288747b095940ebd36ec77ccf9c70401 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 17 Apr 2023 15:21:08 -0700 Subject: [PATCH 263/349] fixed type annotations --- google/cloud/bigtable/_row_merger.py | 12 +++++++----- google/cloud/bigtable/client.py | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index c560f87a7..57bea122e 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -33,6 +33,8 @@ AsyncIterable, AsyncIterator, AsyncGenerator, + Callable, + Awaitable, ) """ @@ -153,11 +155,11 @@ async def _generator_to_cache( async def retryable_merge_rows( self, - gapic_fn, - cache_size, - per_row_timeout, - per_request_timeout, - row_limit, + gapic_fn: Callable[..., Awaitable[AsyncIterable[ReadRowsResponse]]], + cache_size: int, + per_row_timeout: float | None, + per_request_timeout: float | None, + row_limit: int, ) -> AsyncGenerator[Row | RequestStats, None]: """ Retryable wrapper for merge_rows. This function is called each time diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 31ddd9fdf..c540622ef 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -325,7 +325,7 @@ def __init__( instance_id is combined with the client's project to fully specify the instance table_id: The ID of the table. - app_profile_id: The app profile to associate with requests. + app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles default_operation_timeout: (Optional) The default timeout, in seconds default_per_row_timeout: (Optional) The default timeout for individual From c50ae18cffe5aecad67174322dfcdc368b958859 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 17 Apr 2023 15:21:24 -0700 Subject: [PATCH 264/349] added slots --- google/cloud/bigtable/_row_merger.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 57bea122e..a34900803 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -335,6 +335,14 @@ class _StateMachine: the state machine will raise an InvalidChunk exception """ + __slots__ = ( + "current_state", + "current_family", + "current_qualifier", + "last_seen_row_key", + "adapter", + ) + def __init__(self): # represents either the last row emitted, or the last_scanned_key sent from backend # all future rows should have keys > last_seen_row_key @@ -448,6 +456,8 @@ class _State(ABC): transitioning to the next state """ + __slots__ = ("_owner",) + def __init__(self, owner: _StateMachine): self._owner = owner @@ -560,6 +570,8 @@ class _RowBuilder: a row. """ + __slots__ = "current_key", "working_cell", "working_value", "completed_cells" + def __init__(self): # initialize state self.reset() From d73121bbfca74c28df44faab15af970e5def62bf Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 17 Apr 2023 15:49:57 -0700 Subject: [PATCH 265/349] renamed cache to buffer --- google/cloud/bigtable/_row_merger.py | 52 ++++++++++++++-------------- google/cloud/bigtable/client.py | 20 +++++------ tests/unit/test_client_read_rows.py | 8 ++--- 3 files changed, 40 insertions(+), 40 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index a34900803..ef20a0d5d 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -71,7 +71,7 @@ def __init__( request: dict[str, Any], client: BigtableAsyncClient, *, - cache_size: int = 0, + buffer_size: int = 0, operation_timeout: float | None = None, per_row_timeout: float | None = None, per_request_timeout: float | None = None, @@ -80,14 +80,14 @@ def __init__( Args: - request: the request dict to send to the Bigtable API - client: the Bigtable client to use to make the request - - cache_size: the size of the buffer to use for caching rows from the network + - buffer_size: the size of the buffer to use for caching rows from the network - operation_timeout: the timeout to use for the entire operation, in seconds - per_row_timeout: the timeout to use when waiting for each individual row, in seconds - per_request_timeout: the timeout to use when waiting for each individual grpc request, in seconds """ self.last_seen_row_key: bytes | None = None self.emit_count = 0 - cache_size = max(cache_size, 0) + buffer_size = max(buffer_size, 0) self.request = request self.operation_timeout = operation_timeout row_limit = request.get("rows_limit", 0) @@ -95,7 +95,7 @@ def __init__( self.partial_retryable = partial( self.retryable_merge_rows, client.read_rows, - cache_size, + buffer_size, per_row_timeout, per_request_timeout, row_limit, @@ -144,19 +144,19 @@ async def aclose(self): self.last_seen_row_key = None @staticmethod - async def _generator_to_cache( - cache: asyncio.Queue[Any], input_generator: AsyncIterable[Any] + async def _generator_to_buffer( + buffer: asyncio.Queue[Any], input_generator: AsyncIterable[Any] ) -> None: """ - Helper function to push items from an async generator into a cache + Helper function to push items from an async generator into a buffer """ async for item in input_generator: - await cache.put(item) + await buffer.put(item) async def retryable_merge_rows( self, gapic_fn: Callable[..., Awaitable[AsyncIterable[ReadRowsResponse]]], - cache_size: int, + buffer_size: int, per_row_timeout: float | None, per_request_timeout: float | None, row_limit: int, @@ -167,7 +167,7 @@ async def retryable_merge_rows( Some fresh state is created on each retry: - grpc network stream - - cache for the stream + - buffer for the stream - state machine to hold merge chunks received from stream Some state is shared between retries: - last_seen_row_key is used to ensure that @@ -191,29 +191,29 @@ async def retryable_merge_rows( self.request, timeout=per_request_timeout, ) - cache: asyncio.Queue[Row | RequestStats] = asyncio.Queue(maxsize=cache_size) + buffer: asyncio.Queue[Row | RequestStats] = asyncio.Queue(maxsize=buffer_size) state_machine = _StateMachine() try: stream_task = asyncio.create_task( - _RowMerger._generator_to_cache( - cache, + _RowMerger._generator_to_buffer( + buffer, _RowMerger.merge_row_response_stream( new_gapic_stream, state_machine ), ) ) - get_from_cache_task = asyncio.create_task(cache.get()) + get_from_buffer_task = asyncio.create_task(buffer.get()) # sleep to allow other tasks to run await asyncio.sleep(0) - # read from state machine and push into cache - # when finished, stream will be done, cache will be empty, but get_from_cache_task will still be waiting + # read from state machine and push into buffer + # when finished, stream will be done, buffer will be empty, but get_from_buffer_task will still be waiting while ( not stream_task.done() - or not cache.empty() - or get_from_cache_task.done() + or not buffer.empty() + or get_from_buffer_task.done() ): - if get_from_cache_task.done(): - new_item = get_from_cache_task.result() + if get_from_buffer_task.done(): + new_item = get_from_buffer_task.result() # don't yield rows that have already been emitted if isinstance(new_item, RequestStats): yield new_item @@ -230,17 +230,17 @@ async def retryable_merge_rows( self.emit_count += 1 if row_limit and self.emit_count >= row_limit: return - # start new task for cache - get_from_cache_task = asyncio.create_task(cache.get()) + # start new task for buffer + get_from_buffer_task = asyncio.create_task(buffer.get()) await asyncio.sleep(0) else: - # wait for either the stream to finish, or a new item to enter the cache + # wait for either the stream to finish, or a new item to enter the buffer first_finish = asyncio.wait( - [stream_task, get_from_cache_task], + [stream_task, get_from_buffer_task], return_when=asyncio.FIRST_COMPLETED, ) await asyncio.wait_for(first_finish, per_row_timeout) - # stream and cache are complete. if there's an exception, raise it + # stream and buffer are complete. if there's an exception, raise it if stream_task.exception(): raise cast(Exception, stream_task.exception()) except asyncio.TimeoutError: @@ -250,7 +250,7 @@ async def retryable_merge_rows( ) finally: stream_task.cancel() - get_from_cache_task.cancel() + get_from_buffer_task.cancel() @staticmethod def _revise_request_rowset( diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index c540622ef..4d1112aaa 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -363,7 +363,7 @@ async def read_rows_stream( self, query: ReadRowsQuery | dict[str, Any], *, - cache_size: int = 0, + buffer_size: int = 0, operation_timeout: float | None = None, per_row_timeout: float | None = None, per_request_timeout: float | None = None, @@ -373,20 +373,20 @@ async def read_rows_stream( Failed requests within operation_timeout and operation_deadline policies will be retried. - By default, row data is streamed eagerly over the network, and fully cached in memory - in the iterator, which can be consumed as needed. The size of the iterator cache can - be configured with cache_size_limit. When the cache is full, the read_rows_stream will pause + By default, row data is streamed eagerly over the network, and fully bufferd in memory + in the iterator, which can be consumed as needed. The size of the iterator buffer can + be configured with buffer_size. When the buffer is full, the read_rows_stream will pause the network stream until space is available Args: - query: contains details about which rows to return - - cache_size: the number of rows to cache in memory. If less than - or equal to 0, cache is unbounded. Defaults to 0 (unbounded) + - buffer_size: the number of rows to buffer in memory. If less than + or equal to 0, buffer is unbounded. Defaults to 0 (unbounded) - operation_timeout: the time budget for the entire operation, in seconds. Failed requests will be retried within the budget. time is only counted while actively waiting on the network. - Completed and cached results can still be accessed after the deadline is complete, - with a DeadlineExceeded exception only raised after cached results are exhausted. + Completed and bufferd results can still be accessed after the deadline is complete, + with a DeadlineExceeded exception only raised after bufferd results are exhausted. If None, defaults to the Table's default_operation_timeout - per_row_timeout: the time budget for a single row read, in seconds. If a row takes longer than per_row_timeout to complete, the ongoing network request will be with a @@ -433,7 +433,7 @@ async def read_rows_stream( row_merger = _RowMerger( request, self.client._gapic_client, - cache_size=cache_size, + buffer_size=buffer_size, operation_timeout=operation_timeout, per_row_timeout=per_row_timeout, per_request_timeout=per_request_timeout, @@ -491,7 +491,7 @@ async def read_rows_sharded( query_list: list[ReadRowsQuery] | list[dict[str, Any]], *, limit: int | None, - cache_size_limit: int | None = None, + buffer_size: int | None = None, operation_timeout: int | float | None = 60, per_row_timeout: int | float | None = 10, per_request_timeout: int | float | None = None, diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index a6b5889bf..7ccb416de 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -158,11 +158,11 @@ async def test_read_rows_query_matches_request(include_app_profile): @pytest.mark.parametrize( - "input_cache_size, expected_cache_size", + "input_buffer_size, expected_buffer_size", [(-100, 0), (-1, 0), (0, 0), (1, 1), (2, 2), (100, 100), (101, 101)], ) @pytest.mark.asyncio -async def test_read_rows_cache_size(input_cache_size, expected_cache_size): +async def test_read_rows_buffer_size(input_buffer_size, expected_buffer_size): async with _make_client() as client: table = client.get_table("instance", "table") query = ReadRowsQuery() @@ -173,12 +173,12 @@ async def test_read_rows_cache_size(input_cache_size, expected_cache_size): queue.side_effect = asyncio.CancelledError try: gen = await table.read_rows_stream( - query, operation_timeout=3, cache_size=input_cache_size + query, operation_timeout=3, buffer_size=input_buffer_size ) [row async for row in gen] except asyncio.CancelledError: pass - queue.assert_called_once_with(maxsize=expected_cache_size) + queue.assert_called_once_with(maxsize=expected_buffer_size) @pytest.mark.parametrize("operation_timeout", [0.001, 0.023, 0.1]) From 14d852709e1c0d4721483ef58462d5ddcd6ee84a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 17 Apr 2023 16:01:37 -0700 Subject: [PATCH 266/349] renamed errors --- google/cloud/bigtable/_row_merger.py | 5 +++-- google/cloud/bigtable/iterators.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index ef20a0d5d..519eab927 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -108,7 +108,7 @@ def __init__( def on_error_fn(exc): if predicate(exc): - self.errors.append(exc) + self.transient_errors.append(exc) retry = retries.AsyncRetry( predicate=predicate, @@ -122,7 +122,8 @@ def on_error_fn(exc): self.stream: AsyncGenerator[Row | RequestStats, None] | None = retry( self.partial_retryable )() - self.errors: List[Exception] = [] + # contains the list of errors that were retried + self.transient_errors: List[Exception] = [] def __aiter__(self) -> AsyncIterator[Row | RequestStats]: """Implements the AsyncIterable interface""" diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index bc3bb4ffc..dffcbbba3 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -117,9 +117,10 @@ async def __anext__(self) -> Row: f"operation_timeout of {merger.operation_timeout:0.1f}s exceeded" ) source_exc = None - if merger.errors: + if merger.transient_errors: source_exc = RetryExceptionGroup( - f"{len(merger.errors)} failed attempts", merger.errors + f"{len(merger.transient_errors)} failed attempts", + merger.transient_errors, ) new_exc.__cause__ = source_exc await self._finish_with_error(new_exc) From 4b89c864c21f750c68c4949734793666a175eb39 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 17 Apr 2023 16:10:32 -0700 Subject: [PATCH 267/349] replaced type check with None check --- google/cloud/bigtable/_row_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 519eab927..296389206 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -131,7 +131,7 @@ def __aiter__(self) -> AsyncIterator[Row | RequestStats]: async def __anext__(self) -> Row | RequestStats: """Implements the AsyncIterator interface""" - if isinstance(self.stream, AsyncGenerator): + if self.stream is not None: return await self.stream.__anext__() else: raise asyncio.InvalidStateError("stream is closed") From 9f89577ab6cea3fb7a4047da3456fe33b156193b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 17 Apr 2023 16:18:37 -0700 Subject: [PATCH 268/349] added comment for last_scanned_row heartbeat --- google/cloud/bigtable/_row_merger.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 296389206..e990e9bf4 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -334,6 +334,11 @@ class _StateMachine: If an unexpected chunk is received for the current state, the state machine will raise an InvalidChunk exception + + The server may send a heartbeat message indicating that it has + processed a particular row, to facilitate retries. This will be passed + to the state machine via handle_last_scanned_row, which emit a + _LastScannedRow marker to the stream. """ __slots__ = ( From 4b229b97f8613198f4d6fe9dbbcd69648c4d9e50 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 17 Apr 2023 16:23:24 -0700 Subject: [PATCH 269/349] added early return --- google/cloud/bigtable/_row_merger.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index e990e9bf4..00f62634a 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -406,9 +406,10 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> Row | None: if chunk.reset_row: # reset row if requested self._handle_reset_chunk(chunk) - else: - # otherwise, process the chunk and update the state - self.current_state = self.current_state.handle_chunk(chunk) + return None + + # process the chunk and update the state + self.current_state = self.current_state.handle_chunk(chunk) if chunk.commit_row: # check if row is complete, and return it if so if not isinstance(self.current_state, AWAITING_NEW_CELL): From 152bccf24a6cf50bdd6278ad6d22e972bbb2d056 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 17 Apr 2023 16:33:41 -0700 Subject: [PATCH 270/349] moved validation --- google/cloud/bigtable/_row_merger.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 00f62634a..be39f9bb8 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -516,6 +516,12 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": # key or the row is the same if chunk.row_key and chunk.row_key != self._owner.adapter.current_key: raise InvalidChunk("Row key changed mid row") + + if not self._owner.current_family: + raise InvalidChunk("Missing family for a new cell") + if self._owner.current_qualifier is None: + raise InvalidChunk("Missing qualifier for a new cell") + self._owner.adapter.start_cell( family=self._owner.current_family, qualifier=self._owner.current_qualifier, @@ -603,16 +609,12 @@ def start_row(self, key: bytes) -> None: def start_cell( self, - family: str | None, - qualifier: bytes | None, + family: str, + qualifier: bytes, timestamp_micros: int, labels: List[str], ) -> None: """called to start a new cell in a row.""" - if not family: - raise InvalidChunk("Missing family for a new cell") - if qualifier is None: - raise InvalidChunk("Missing qualifier for a new cell") if self.current_key is None: raise InvalidChunk("start_cell called without a row") self.working_value = bytearray() From 67c291111c0ee9d89cc73a9ea91beb1d852a0521 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 17 Apr 2023 17:00:06 -0700 Subject: [PATCH 271/349] added close call to ReadRowsIterator --- google/cloud/bigtable/iterators.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index dffcbbba3..97d842abe 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -141,3 +141,11 @@ async def _finish_with_error(self, e: Exception): if self._idle_timeout_task is not None: self._idle_timeout_task.cancel() self._idle_timeout_task = None + + async def aclose(self): + """ + Support closing the stream with an explicit call to aclose() + """ + await self._finish_with_error( + StopAsyncIteration(f"{self.__class__.__name__} closed") + ) From ff11ad329b319a3ea5eb9497bcb26e5a16038da4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 17 Apr 2023 17:17:24 -0700 Subject: [PATCH 272/349] removed del --- google/cloud/bigtable/_row_merger.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index be39f9bb8..de4c53b0a 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -140,7 +140,6 @@ async def aclose(self): """Close the stream and release resources""" if isinstance(self.stream, AsyncGenerator): await self.stream.aclose() - del self.stream self.stream = None self.last_seen_row_key = None @@ -215,7 +214,6 @@ async def retryable_merge_rows( ): if get_from_buffer_task.done(): new_item = get_from_buffer_task.result() - # don't yield rows that have already been emitted if isinstance(new_item, RequestStats): yield new_item # ignore rows that have already been emitted From 78bd5d3a56caafc635b9c46339eac7122c225fef Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 18 Apr 2023 11:00:16 -0700 Subject: [PATCH 273/349] pull out buffer control logic --- google/cloud/bigtable/_row_merger.py | 75 +++++++++------------------- 1 file changed, 24 insertions(+), 51 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index de4c53b0a..7633499ae 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -27,7 +27,6 @@ from abc import ABC, abstractmethod from typing import ( - cast, List, Any, AsyncIterable, @@ -191,65 +190,39 @@ async def retryable_merge_rows( self.request, timeout=per_request_timeout, ) - buffer: asyncio.Queue[Row | RequestStats] = asyncio.Queue(maxsize=buffer_size) state_machine = _StateMachine() try: - stream_task = asyncio.create_task( - _RowMerger._generator_to_buffer( - buffer, - _RowMerger.merge_row_response_stream( - new_gapic_stream, state_machine - ), - ) + stream = _RowMerger.merge_row_response_stream( + new_gapic_stream, state_machine ) - get_from_buffer_task = asyncio.create_task(buffer.get()) - # sleep to allow other tasks to run - await asyncio.sleep(0) - # read from state machine and push into buffer - # when finished, stream will be done, buffer will be empty, but get_from_buffer_task will still be waiting - while ( - not stream_task.done() - or not buffer.empty() - or get_from_buffer_task.done() - ): - if get_from_buffer_task.done(): - new_item = get_from_buffer_task.result() - if isinstance(new_item, RequestStats): + # run until we get a timeout or the stream is exhausted + while True: + new_item = await asyncio.wait_for( + stream.__anext__(), timeout=per_row_timeout + ) + if isinstance(new_item, RequestStats): + yield new_item + # ignore rows that have already been emitted + elif isinstance(new_item, Row) and ( + self.last_seen_row_key is None + or new_item.row_key > self.last_seen_row_key + ): + self.last_seen_row_key = new_item.row_key + # don't yeild _LastScannedRow markers; they + # should only update last_seen_row_key + if not isinstance(new_item, _LastScannedRow): yield new_item - # ignore rows that have already been emitted - elif isinstance(new_item, Row) and ( - self.last_seen_row_key is None - or new_item.row_key > self.last_seen_row_key - ): - self.last_seen_row_key = new_item.row_key - # don't yeild _LastScannedRow markers; they - # should only update last_seen_row_key - if not isinstance(new_item, _LastScannedRow): - yield new_item - self.emit_count += 1 - if row_limit and self.emit_count >= row_limit: - return - # start new task for buffer - get_from_buffer_task = asyncio.create_task(buffer.get()) - await asyncio.sleep(0) - else: - # wait for either the stream to finish, or a new item to enter the buffer - first_finish = asyncio.wait( - [stream_task, get_from_buffer_task], - return_when=asyncio.FIRST_COMPLETED, - ) - await asyncio.wait_for(first_finish, per_row_timeout) - # stream and buffer are complete. if there's an exception, raise it - if stream_task.exception(): - raise cast(Exception, stream_task.exception()) + self.emit_count += 1 + if row_limit and self.emit_count >= row_limit: + return except asyncio.TimeoutError: # per_row_timeout from asyncio.wait_for raise core_exceptions.DeadlineExceeded( f"per_row_timeout of {per_row_timeout:0.1f}s exceeded" ) - finally: - stream_task.cancel() - get_from_buffer_task.cancel() + except StopAsyncIteration: + # end of stream + return @staticmethod def _revise_request_rowset( From ca4a16df9c20d753774255dae83aa94865657b87 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 18 Apr 2023 11:22:20 -0700 Subject: [PATCH 274/349] got buffering working --- google/cloud/bigtable/_row_merger.py | 34 +++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 7633499ae..0b2e33080 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -149,8 +149,27 @@ async def _generator_to_buffer( """ Helper function to push items from an async generator into a buffer """ - async for item in input_generator: - await buffer.put(item) + try: + async for item in input_generator: + await buffer.put(item) + await buffer.put(StopAsyncIteration) + except Exception as e: + await buffer.put(e) + + @staticmethod + async def _buffer_to_generator( + buffer: asyncio.Queue[Any], + ) -> AsyncGenerator[Any, None]: + """ + Helper function to yield items from a buffer as an async generator + """ + while True: + item = await buffer.get() + if item is StopAsyncIteration: + return + if isinstance(item, Exception): + raise item + yield item async def retryable_merge_rows( self, @@ -190,10 +209,17 @@ async def retryable_merge_rows( self.request, timeout=per_request_timeout, ) + buffer: asyncio.Queue[Row | RequestStats | Exception] = asyncio.Queue( + maxsize=buffer_size + ) + buffer_task = asyncio.create_task( + self._generator_to_buffer(buffer, new_gapic_stream) + ) + buffered_stream = self._buffer_to_generator(buffer) state_machine = _StateMachine() try: stream = _RowMerger.merge_row_response_stream( - new_gapic_stream, state_machine + buffered_stream, state_machine ) # run until we get a timeout or the stream is exhausted while True: @@ -223,6 +249,8 @@ async def retryable_merge_rows( except StopAsyncIteration: # end of stream return + finally: + buffer_task.cancel() @staticmethod def _revise_request_rowset( From 0dba12165ffc44163eae5903fb258620486328d5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 18 Apr 2023 11:36:36 -0700 Subject: [PATCH 275/349] check for full table scan revision --- google/cloud/bigtable/_row_merger.py | 4 ++++ tests/unit/test__row_merger.py | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index 0b2e33080..c5ec3a50a 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -284,6 +284,10 @@ def _revise_request_rowset( row_ranges[0]["start_key_open"] = last_seen_row_key if "start_key_closed" in row_ranges[0]: row_ranges[0].pop("start_key_closed") + # if our modifications result in an empty row_set, return the + # original row_set. This will avoid an unwanted full table scan + if len(row_keys) == 0 and len(row_ranges) == 0: + return row_set return {"row_keys": adjusted_keys, "row_ranges": row_ranges} @staticmethod diff --git a/tests/unit/test__row_merger.py b/tests/unit/test__row_merger.py index 968549308..21b6be365 100644 --- a/tests/unit/test__row_merger.py +++ b/tests/unit/test__row_merger.py @@ -19,6 +19,11 @@ def _get_target_class(): def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) + def test_revise_to_empty_rowset(self): + # ensure that the _revise_to_empty_set method + # does not return a full table scan + pass + class TestStateMachine(unittest.TestCase): @staticmethod From 35375664b6902f05e986bb63562de520f277d2bd Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 18 Apr 2023 11:51:05 -0700 Subject: [PATCH 276/349] renamed and added underscores --- google/cloud/bigtable/_row_merger.py | 52 ++++++++++++++-------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index c5ec3a50a..f0c08bba1 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -84,15 +84,15 @@ def __init__( - per_row_timeout: the timeout to use when waiting for each individual row, in seconds - per_request_timeout: the timeout to use when waiting for each individual grpc request, in seconds """ - self.last_seen_row_key: bytes | None = None - self.emit_count = 0 + self._last_seen_row_key: bytes | None = None + self._emit_count = 0 buffer_size = max(buffer_size, 0) - self.request = request + self._request = request self.operation_timeout = operation_timeout row_limit = request.get("rows_limit", 0) # lock in paramters for retryable wrapper - self.partial_retryable = partial( - self.retryable_merge_rows, + self._partial_retryable = partial( + self._read_rows_retryable_attempt, client.read_rows, buffer_size, per_row_timeout, @@ -118,8 +118,8 @@ def on_error_fn(exc): on_error=on_error_fn, is_generator=True, ) - self.stream: AsyncGenerator[Row | RequestStats, None] | None = retry( - self.partial_retryable + self._stream: AsyncGenerator[Row | RequestStats, None] | None = retry( + self._partial_retryable )() # contains the list of errors that were retried self.transient_errors: List[Exception] = [] @@ -130,17 +130,17 @@ def __aiter__(self) -> AsyncIterator[Row | RequestStats]: async def __anext__(self) -> Row | RequestStats: """Implements the AsyncIterator interface""" - if self.stream is not None: - return await self.stream.__anext__() + if self._stream is not None: + return await self._stream.__anext__() else: raise asyncio.InvalidStateError("stream is closed") async def aclose(self): """Close the stream and release resources""" - if isinstance(self.stream, AsyncGenerator): - await self.stream.aclose() - self.stream = None - self.last_seen_row_key = None + if isinstance(self._stream, AsyncGenerator): + await self._stream.aclose() + self._stream = None + self._last_seen_row_key = None @staticmethod async def _generator_to_buffer( @@ -171,7 +171,7 @@ async def _buffer_to_generator( raise item yield item - async def retryable_merge_rows( + async def _read_rows_retryable_attempt( self, gapic_fn: Callable[..., Awaitable[AsyncIterable[ReadRowsResponse]]], buffer_size: int, @@ -192,21 +192,21 @@ async def retryable_merge_rows( duplicate rows are not emitted - request is stored and (optionally) modified on each retry """ - if self.last_seen_row_key is not None: + if self._last_seen_row_key is not None: # if this is a retry, try to trim down the request to avoid ones we've already processed - self.request["rows"] = _RowMerger._revise_request_rowset( - row_set=self.request.get("rows", None), - last_seen_row_key=self.last_seen_row_key, + self._request["rows"] = _RowMerger._revise_request_rowset( + row_set=self._request.get("rows", None), + last_seen_row_key=self._last_seen_row_key, ) # revise next request's row limit based on number emitted if row_limit: - new_limit = row_limit - self.emit_count + new_limit = row_limit - self._emit_count if new_limit <= 0: return else: - self.request["rows_limit"] = new_limit + self._request["rows_limit"] = new_limit new_gapic_stream = await gapic_fn( - self.request, + self._request, timeout=per_request_timeout, ) buffer: asyncio.Queue[Row | RequestStats | Exception] = asyncio.Queue( @@ -230,16 +230,16 @@ async def retryable_merge_rows( yield new_item # ignore rows that have already been emitted elif isinstance(new_item, Row) and ( - self.last_seen_row_key is None - or new_item.row_key > self.last_seen_row_key + self._last_seen_row_key is None + or new_item.row_key > self._last_seen_row_key ): - self.last_seen_row_key = new_item.row_key + self._last_seen_row_key = new_item.row_key # don't yeild _LastScannedRow markers; they # should only update last_seen_row_key if not isinstance(new_item, _LastScannedRow): yield new_item - self.emit_count += 1 - if row_limit and self.emit_count >= row_limit: + self._emit_count += 1 + if row_limit and self._emit_count >= row_limit: return except asyncio.TimeoutError: # per_row_timeout from asyncio.wait_for From 981f169b4d45ef30f95a5b7440065b49e6bd55fa Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 18 Apr 2023 13:25:27 -0700 Subject: [PATCH 277/349] added extra check --- google/cloud/bigtable/_row_merger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index f0c08bba1..bd6d636fb 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -445,9 +445,9 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): raise InvalidChunk("Reset chunk received when not processing row") if chunk.row_key: raise InvalidChunk("Reset chunk has a row key") - if chunk.family_name.value: + if chunk.family_name and chunk.family_name.value: raise InvalidChunk("Reset chunk has family_name") - if chunk.qualifier.value: + if chunk.qualifier and chunk.qualifier.value: raise InvalidChunk("Reset chunk has qualifier") if chunk.timestamp_micros: raise InvalidChunk("Reset chunk has a timestamp") From d3d4c765932b1b6a39c39094af1da0968cfa4ad1 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 18 Apr 2023 13:41:34 -0700 Subject: [PATCH 278/349] removed unneeded validation --- google/cloud/bigtable/_row_merger.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_row_merger.py index bd6d636fb..dc7d27ae8 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_row_merger.py @@ -601,13 +601,6 @@ def reset(self) -> None: def start_row(self, key: bytes) -> None: """Called to start a new row. This will be called once per row""" - if ( - self.current_key is not None - or self.working_cell is not None - or self.working_value is not None - or self.completed_cells - ): - raise InvalidChunk("start_row called without finishing previous row") self.current_key = key def start_cell( From 19010948a93de1cfe92b53ce448bfd001bfec48c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 18 Apr 2023 14:42:30 -0700 Subject: [PATCH 279/349] renamed RowMerger to ReadRowsOperation --- .../bigtable/{_row_merger.py => _read_rows.py} | 18 +++++++++--------- google/cloud/bigtable/client.py | 8 ++++---- google/cloud/bigtable/iterators.py | 12 ++++++------ ..._merger.py => test__read_rows_operation.py} | 10 +++++----- tests/unit/test_client_read_rows.py | 14 ++++++++------ tests/unit/test_read_rows_acceptance.py | 10 ++++++---- 6 files changed, 38 insertions(+), 34 deletions(-) rename google/cloud/bigtable/{_row_merger.py => _read_rows.py} (97%) rename tests/unit/{test__row_merger.py => test__read_rows_operation.py} (88%) diff --git a/google/cloud/bigtable/_row_merger.py b/google/cloud/bigtable/_read_rows.py similarity index 97% rename from google/cloud/bigtable/_row_merger.py rename to google/cloud/bigtable/_read_rows.py index dc7d27ae8..3dcf106c9 100644 --- a/google/cloud/bigtable/_row_merger.py +++ b/google/cloud/bigtable/_read_rows.py @@ -40,7 +40,7 @@ This module provides a set of classes for merging ReadRowsResponse chunks into Row objects. -- RowMerger is the highest level class, providing an interface for asynchronous +- ReadRowsOperation is the highest level class, providing an interface for asynchronous merging end-to-end - StateMachine is used internally to track the state of the merge, including the current row key and the keys of the rows that have been processed. @@ -52,16 +52,16 @@ """ -class _RowMerger(AsyncIterable[Row]): +class _ReadRowsOperation(AsyncIterable[Row]): """ - RowMerger handles the logic of merging chunks from a ReadRowsResponse stream + ReadRowsOperation handles the logic of merging chunks from a ReadRowsResponse stream into a stream of Row objects. - RowMerger.merge_row_response_stream takes in a stream of ReadRowsResponse + ReadRowsOperation.merge_row_response_stream takes in a stream of ReadRowsResponse and turns them into a stream of Row objects using an internal StateMachine. - RowMerger(request, client) handles row merging logic end-to-end, including + ReadRowsOperation(request, client) handles row merging logic end-to-end, including performing retries on stream errors. """ @@ -194,7 +194,7 @@ async def _read_rows_retryable_attempt( """ if self._last_seen_row_key is not None: # if this is a retry, try to trim down the request to avoid ones we've already processed - self._request["rows"] = _RowMerger._revise_request_rowset( + self._request["rows"] = _ReadRowsOperation._revise_request_rowset( row_set=self._request.get("rows", None), last_seen_row_key=self._last_seen_row_key, ) @@ -218,7 +218,7 @@ async def _read_rows_retryable_attempt( buffered_stream = self._buffer_to_generator(buffer) state_machine = _StateMachine() try: - stream = _RowMerger.merge_row_response_stream( + stream = _ReadRowsOperation.merge_row_response_stream( buffered_stream, state_machine ) # run until we get a timeout or the stream is exhausted @@ -382,7 +382,7 @@ def is_terminal_state(self) -> bool: def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> Row: """ - Called by RowMerger to notify the state machine of a scan heartbeat + Called by ReadRowsOperation to notify the state machine of a scan heartbeat Returns an empty row with the last_scanned_row_key """ @@ -396,7 +396,7 @@ def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> Row: def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> Row | None: """ - Called by RowMerger to process a new chunk + Called by ReadRowsOperation to process a new chunk Returns a Row if the chunk completes a row, otherwise returns None """ diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 4d1112aaa..81b728fb8 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -38,7 +38,7 @@ ) from google.cloud.client import ClientWithProject from google.api_core.exceptions import GoogleAPICallError -from google.cloud.bigtable._row_merger import _RowMerger +from google.cloud.bigtable._read_rows import _ReadRowsOperation import google.auth.credentials import google.auth._default @@ -427,10 +427,10 @@ async def read_rows_stream( # read_rows smart retries is implemented using a series of iterators: # - client.read_rows: outputs raw ReadRowsResponse objects from backend. Has per_request_timeout - # - RowMerger.merge_row_response_stream: parses chunks into rows - # - RowMerger.retryable_merge_rows: adds retries, caching, revised requests, per_row_timeout, per_row_timeout + # - ReadRowsOperation.merge_row_response_stream: parses chunks into rows + # - ReadRowsOperation.retryable_merge_rows: adds retries, caching, revised requests, per_row_timeout, per_row_timeout # - ReadRowsIterator: adds idle_timeout, moves stats out of stream and into attribute - row_merger = _RowMerger( + row_merger = _ReadRowsOperation( request, self.client._gapic_client, buffer_size=buffer_size, diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index 97d842abe..c47904efb 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -22,7 +22,7 @@ import time import sys -from google.cloud.bigtable._row_merger import _RowMerger +from google.cloud.bigtable._read_rows import _ReadRowsOperation from google.cloud.bigtable_v2.types import RequestStats from google.api_core import exceptions as core_exceptions from google.cloud.bigtable.exceptions import RetryExceptionGroup @@ -35,8 +35,8 @@ class ReadRowsIterator(AsyncIterable[Row]): Async iterator for ReadRows responses. """ - def __init__(self, merger: _RowMerger): - self._merger_or_error: _RowMerger | Exception = merger + def __init__(self, merger: _ReadRowsOperation): + self._merger_or_error: _ReadRowsOperation | Exception = merger self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() self._idle_timeout_task: asyncio.Task[None] | None = None @@ -64,7 +64,7 @@ def active(self): """ Returns True if the iterator is still active and has not been closed """ - return isinstance(self._merger_or_error, _RowMerger) + return isinstance(self._merger_or_error, _ReadRowsOperation) async def _idle_timeout_coroutine(self, idle_timeout: float): """ @@ -102,7 +102,7 @@ async def __anext__(self) -> Row: if isinstance(self._merger_or_error, Exception): raise self._merger_or_error else: - merger = cast(_RowMerger, self._merger_or_error) + merger = cast(_ReadRowsOperation, self._merger_or_error) try: self.last_interaction_time = time.time() next_item = await merger.__anext__() @@ -134,7 +134,7 @@ async def _finish_with_error(self, e: Exception): Helper function to close the stream and clean up resources after an error has occurred. """ - if isinstance(self._merger_or_error, _RowMerger): + if isinstance(self._merger_or_error, _ReadRowsOperation): await self._merger_or_error.aclose() del self._merger_or_error self._merger_or_error = e diff --git a/tests/unit/test__row_merger.py b/tests/unit/test__read_rows_operation.py similarity index 88% rename from tests/unit/test__row_merger.py rename to tests/unit/test__read_rows_operation.py index 21b6be365..de4c54e24 100644 --- a/tests/unit/test__row_merger.py +++ b/tests/unit/test__read_rows_operation.py @@ -9,12 +9,12 @@ TEST_LABELS = ["label1", "label2"] -class TestRowMerger(unittest.IsolatedAsyncioTestCase): +class TestReadRowsOperation(unittest.IsolatedAsyncioTestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable._row_merger import _RowMerger + from google.cloud.bigtable._read_rows import _ReadRowsOperation - return _RowMerger + return _ReadRowsOperation def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) @@ -28,7 +28,7 @@ def test_revise_to_empty_rowset(self): class TestStateMachine(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable._row_merger import _StateMachine + from google.cloud.bigtable._read_rows import _StateMachine return _StateMachine @@ -43,7 +43,7 @@ class TestState(unittest.TestCase): class TestRowBuilder(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable._row_merger import _RowBuilder + from google.cloud.bigtable._read_rows import _RowBuilder return _RowBuilder diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 7ccb416de..e341adf37 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -183,7 +183,7 @@ async def test_read_rows_buffer_size(input_buffer_size, expected_buffer_size): @pytest.mark.parametrize("operation_timeout", [0.001, 0.023, 0.1]) @pytest.mark.asyncio -async def test_read_rows_operation_timeout(operation_timeout): +async def test_read_rows_timeout(operation_timeout): async with _make_client() as client: table = client.get_table("instance", "table") query = ReadRowsQuery() @@ -299,7 +299,7 @@ async def test_read_rows_idle_timeout(): BigtableAsyncClient, ) from google.cloud.bigtable.exceptions import IdleTimeout - from google.cloud.bigtable._row_merger import _RowMerger + from google.cloud.bigtable._read_rows import _ReadRowsOperation chunks = [_make_chunk(row_key=b"test_1"), _make_chunk(row_key=b"test_2")] with mock.patch.object(BigtableAsyncClient, "read_rows") as read_rows: @@ -313,7 +313,7 @@ async def test_read_rows_idle_timeout(): gen = await table.read_rows_stream(query) # should start idle timer on creation start_idle_timer.assert_called_once() - with mock.patch.object(_RowMerger, "aclose", AsyncMock()) as aclose: + with mock.patch.object(_ReadRowsOperation, "aclose", AsyncMock()) as aclose: # start idle timer with our own value await gen._start_idle_timer(0.1) # should timeout after being abandoned @@ -425,10 +425,12 @@ async def test_read_rows_request_stats_missing(): @pytest.mark.asyncio async def test_read_rows_revise_request(): - from google.cloud.bigtable._row_merger import _RowMerger + from google.cloud.bigtable._read_rows import _ReadRowsOperation - with mock.patch.object(_RowMerger, "_revise_request_rowset") as revise_rowset: - with mock.patch.object(_RowMerger, "aclose"): + with mock.patch.object( + _ReadRowsOperation, "_revise_request_rowset" + ) as revise_rowset: + with mock.patch.object(_ReadRowsOperation, "aclose"): revise_rowset.side_effect = [ "modified", core_exceptions.Cancelled("mock error"), diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index 94b4e3829..b94548d9f 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -23,7 +23,7 @@ from google.cloud.bigtable.client import BigtableDataClient from google.cloud.bigtable.exceptions import InvalidChunk -from google.cloud.bigtable._row_merger import _RowMerger, _StateMachine +from google.cloud.bigtable._read_rows import _ReadRowsOperation, _StateMachine from google.cloud.bigtable.row import Row from .v2_client.test_row_merger import ReadRowsTest, TestFile @@ -67,7 +67,7 @@ async def _scenerio_stream(): try: state = _StateMachine() results = [] - async for row in _RowMerger.merge_row_response_stream( + async for row in _ReadRowsOperation.merge_row_response_stream( _scenerio_stream(), state ): for cell in row: @@ -136,7 +136,9 @@ async def _row_stream(): state = _StateMachine() state.last_seen_row_key = b"a" with pytest.raises(InvalidChunk): - async for _ in _RowMerger.merge_row_response_stream(_row_stream(), state): + async for _ in _ReadRowsOperation.merge_row_response_stream( + _row_stream(), state + ): pass @@ -293,6 +295,6 @@ async def _row_stream(): state = _StateMachine() results = [] - async for row in _RowMerger.merge_row_response_stream(_row_stream(), state): + async for row in _ReadRowsOperation.merge_row_response_stream(_row_stream(), state): results.append(row) return results From 947fe9b99b054755aaa525e32341ed1d82ecdd0b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 18 Apr 2023 16:09:21 -0700 Subject: [PATCH 280/349] changed _read_rows test file name --- .../{test__read_rows_operation.py => test__read_rows.py} | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) rename tests/unit/{test__read_rows_operation.py => test__read_rows.py} (91%) diff --git a/tests/unit/test__read_rows_operation.py b/tests/unit/test__read_rows.py similarity index 91% rename from tests/unit/test__read_rows_operation.py rename to tests/unit/test__read_rows.py index de4c54e24..fa5d25bcd 100644 --- a/tests/unit/test__read_rows_operation.py +++ b/tests/unit/test__read_rows.py @@ -61,7 +61,11 @@ def test_ctor(self): self.assertEqual(row_builder.completed_cells, []) def test_start_row(self): - pass + row_builder = self._make_one() + row_builder.start_row(b"row_key") + self.assertEqual(row_builder.current_key, b"row_key") + row_builder.start_row(b"row_key2") + self.assertEqual(row_builder.current_key, b"row_key2") def test_start_cell(self): # test with no family From 773d4e5077d17c52f08ed378ab7d368370100b78 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 18 Apr 2023 16:27:11 -0700 Subject: [PATCH 281/349] added row builder tests --- google/cloud/bigtable/_read_rows.py | 2 +- google/cloud/bigtable/row.py | 7 ++ tests/unit/test__read_rows.py | 101 +++++++++++++++++++++++++--- 3 files changed, 100 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index 3dcf106c9..468c92351 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -627,7 +627,7 @@ def cell_value(self, value: bytes) -> None: def finish_cell(self) -> None: """called once per cell to signal the end of the value (unless reset)""" if self.working_cell is None or self.working_value is None: - raise InvalidChunk("Cell value received before start_cell") + raise InvalidChunk("finish_cell called before start_cell") self.working_cell.value = bytes(self.working_value) self.completed_cells.append(self.working_cell) self.working_cell = None diff --git a/google/cloud/bigtable/row.py b/google/cloud/bigtable/row.py index 791899c2b..2fe7cf58c 100644 --- a/google/cloud/bigtable/row.py +++ b/google/cloud/bigtable/row.py @@ -60,6 +60,13 @@ def __init__( self._cells_map[cell.family][cell.column_qualifier].append(cell) self._cells_list.append(cell) + @property + def cells(self) -> list[Cell]: + """ + Returns a list of all cells in the row + """ + return self.get_cells() + def get_cells( self, family: str | None = None, qualifier: str | bytes | None = None ) -> list[Cell]: diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index fa5d25bcd..7d6e65edb 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -79,24 +79,107 @@ def test_start_cell(self): TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS ) self.assertEqual(str(e.exception), "start_cell called without a row") - - def test_cell_value_no_cell(self): - pass + # test with valid row + row_builder = self._make_one() + row_builder.start_row(b"row_key") + row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + self.assertEqual(row_builder.working_cell.family, TEST_FAMILY) + self.assertEqual(row_builder.working_cell.column_qualifier, TEST_QUALIFIER) + self.assertEqual(row_builder.working_cell.timestamp_micros, TEST_TIMESTAMP) + self.assertEqual(row_builder.working_cell.labels, TEST_LABELS) + self.assertEqual(row_builder.working_value, b"") def test_cell_value(self): - pass + row_builder = self._make_one() + row_builder.start_row(b"row_key") + with self.assertRaises(InvalidChunk) as e: + # start_cell must be called before cell_value + row_builder.cell_value(b"cell_value") + row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + row_builder.cell_value(b"cell_value") + self.assertEqual(row_builder.working_value, b"cell_value") + # should be able to continuously append to the working value + row_builder.cell_value(b"appended") + self.assertEqual(row_builder.working_value, b"cell_valueappended") def test_finish_cell(self): - pass + row_builder = self._make_one() + row_builder.start_row(b"row_key") + row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + row_builder.finish_cell() + self.assertEqual(len(row_builder.completed_cells), 1) + self.assertEqual(row_builder.completed_cells[0].family, TEST_FAMILY) + self.assertEqual(row_builder.completed_cells[0].column_qualifier, TEST_QUALIFIER) + self.assertEqual(row_builder.completed_cells[0].timestamp_micros, TEST_TIMESTAMP) + self.assertEqual(row_builder.completed_cells[0].labels, TEST_LABELS) + self.assertEqual(row_builder.completed_cells[0].value, b"") + self.assertEqual(row_builder.working_cell, None) + self.assertEqual(row_builder.working_value, None) + # add additional cell with value + row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + row_builder.cell_value(b"cell_value") + row_builder.cell_value(b"appended") + row_builder.finish_cell() + self.assertEqual(len(row_builder.completed_cells), 2) + self.assertEqual(row_builder.completed_cells[1].family, TEST_FAMILY) + self.assertEqual(row_builder.completed_cells[1].column_qualifier, TEST_QUALIFIER) + self.assertEqual(row_builder.completed_cells[1].timestamp_micros, TEST_TIMESTAMP) + self.assertEqual(row_builder.completed_cells[1].labels, TEST_LABELS) + self.assertEqual(row_builder.completed_cells[1].value, b"cell_valueappended") + self.assertEqual(row_builder.working_cell, None) + self.assertEqual(row_builder.working_value, None) def test_finish_cell_no_cell(self): - pass + with self.assertRaises(InvalidChunk) as e: + self._make_one().finish_cell() + self.assertEqual(str(e.exception), "finish_cell called before start_cell") + with self.assertRaises(InvalidChunk) as e: + row_builder = self._make_one() + row_builder.start_row(b"row_key") + row_builder.finish_cell() + self.assertEqual(str(e.exception), "finish_cell called before start_cell") def test_finish_row(self): - pass + row_builder = self._make_one() + row_builder.start_row(b"row_key") + for i in range(3): + row_builder.start_cell(str(i), TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + row_builder.cell_value(b"cell_value: ") + row_builder.cell_value(str(i).encode("utf-8")) + row_builder.finish_cell() + self.assertEqual(len(row_builder.completed_cells), i + 1) + output = row_builder.finish_row() + self.assertEqual(row_builder.current_key, None) + self.assertEqual(row_builder.working_cell, None) + self.assertEqual(row_builder.working_value, None) + self.assertEqual(len(row_builder.completed_cells), 0) + + self.assertEqual(output.row_key, b"row_key") + self.assertEqual(len(output), 3) + for i in range(3): + self.assertEqual(output[i].family, str(i)) + self.assertEqual(output[i].column_qualifier, TEST_QUALIFIER) + self.assertEqual(output[i].timestamp_micros, TEST_TIMESTAMP) + self.assertEqual(output[i].labels, TEST_LABELS) + self.assertEqual(output[i].value, b"cell_value: " + str(i).encode("utf-8")) + def finish_row_no_row(self): - pass + with self.assertRaises(InvalidChunk) as e: + self._make_one().finish_row() + self.assertEqual(str(e.exception), "No row in progress") def test_reset(self): - pass + row_builder = self._make_one() + row_builder.start_row(b"row_key") + for i in range(3): + row_builder.start_cell(str(i), TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + row_builder.cell_value(b"cell_value: ") + row_builder.cell_value(str(i).encode("utf-8")) + row_builder.finish_cell() + self.assertEqual(len(row_builder.completed_cells), i + 1) + row_builder.reset() + self.assertEqual(row_builder.current_key, None) + self.assertEqual(row_builder.working_cell, None) + self.assertEqual(row_builder.working_value, None) + self.assertEqual(len(row_builder.completed_cells), 0) From cbb0513b74074cb8f663c526d595f40174187b0d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 18 Apr 2023 18:02:10 -0700 Subject: [PATCH 282/349] added revise_row tests --- google/cloud/bigtable/_read_rows.py | 24 ++++++++----- tests/unit/test__read_rows.py | 55 +++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index 468c92351..b80fe2d38 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -265,7 +265,7 @@ def _revise_request_rowset( - last_seen_row_key: the last row key encountered """ # if user is doing a whole table scan, start a new one with the last seen key - if row_set is None: + if row_set is None or (len(row_set.get("row_ranges", [])) == 0 and len(row_set.get("row_keys", [])) == 0): last_seen = last_seen_row_key return { "row_keys": [], @@ -278,17 +278,25 @@ def _revise_request_rowset( for key in row_keys: if key > last_seen_row_key: adjusted_keys.append(key) - # if user specified only a single range, set start to the last seen key + # adjust ranges to ignore keys before last seen row_ranges: list[dict[str, Any]] = row_set.get("row_ranges", []) - if len(row_keys) == 0 and len(row_ranges) == 1: - row_ranges[0]["start_key_open"] = last_seen_row_key - if "start_key_closed" in row_ranges[0]: - row_ranges[0].pop("start_key_closed") + adjusted_ranges = [] + for row_range in row_ranges: + end_key = row_range.get("end_key_closed", None) or row_range.get("end_key_open", None) + if end_key is None or end_key > last_seen_row_key: + # end range is after last seen key + new_range = row_range.copy() + start_key = row_range.get("start_key_closed", None) or row_range.get("start_key_open", None) + if start_key is None or start_key <= last_seen_row_key: + # replace start key with last seen + new_range["start_key_open"] = last_seen_row_key + new_range.pop("start_key_closed", None) + adjusted_ranges.append(new_range) # if our modifications result in an empty row_set, return the # original row_set. This will avoid an unwanted full table scan - if len(row_keys) == 0 and len(row_ranges) == 0: + if len(adjusted_keys) == 0 and len(adjusted_ranges) == 0: return row_set - return {"row_keys": adjusted_keys, "row_ranges": row_ranges} + return {"row_keys": adjusted_keys, "row_ranges": adjusted_ranges} @staticmethod async def merge_row_response_stream( diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index 7d6e65edb..fdb8fbe2f 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -1,5 +1,6 @@ import unittest from unittest import mock +import pytest from google.cloud.bigtable.exceptions import InvalidChunk @@ -9,7 +10,7 @@ TEST_LABELS = ["label1", "label2"] -class TestReadRowsOperation(unittest.IsolatedAsyncioTestCase): +class TestReadRowsOperation(): @staticmethod def _get_target_class(): from google.cloud.bigtable._read_rows import _ReadRowsOperation @@ -19,10 +20,60 @@ def _get_target_class(): def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) + @pytest.mark.parametrize("in_keys,last_key,expected", [ + (["b", "c", "d"], "a", ["b", "c", "d"]), + (["a", "b", "c"], "b", ["c"]), + (["a", "b", "c"], "c", []), + (["a", "b", "c"], "d", []), + (["d", "c", "b", "a"], "b", ["d", "c"]), + ]) + def test_revise_request_rowset_keys(self, in_keys, last_key, expected): + sample_range = {"start_key_open": last_key} + row_set = {"row_keys": in_keys, "row_ranges": [sample_range]} + revised = self._get_target_class()._revise_request_rowset(row_set, last_key) + assert revised["row_keys"] == expected + assert revised["row_ranges"] == [sample_range] + + @pytest.mark.parametrize("in_ranges,last_key,expected", [ + ([{"start_key_open": "b", "end_key_closed": "d"}], "a", [{"start_key_open": "b", "end_key_closed": "d"}]), + ([{"start_key_closed": "b", "end_key_closed": "d"}], "a", [{"start_key_closed": "b", "end_key_closed": "d"}]), + ([{"start_key_open": "a", "end_key_closed": "d"}], "b", [{"start_key_open": "b", "end_key_closed": "d"}]), + ([{"start_key_closed": "a", "end_key_open": "d"}], "b", [{"start_key_open": "b", "end_key_open": "d"}]), + ([{"start_key_closed": "b", "end_key_closed": "d"}], "b", [{"start_key_open": "b", "end_key_closed": "d"}]), + ([{"start_key_closed": "b", "end_key_closed": "d"}], "d", []), + ([{"start_key_closed": "b", "end_key_open": "d"}], "d", []), + ([{"start_key_closed": "b", "end_key_closed": "d"}], "e", []), + ([{"start_key_closed": "b"}], "z", [{"start_key_open": "z"}]), + ([{"start_key_closed": "b"}], "a", [{"start_key_closed": "b"}]), + ([{"end_key_closed": "z"}], "a", [{"start_key_open": "a", "end_key_closed": "z"}]), + ([{"end_key_open": "z"}], "a", [{"start_key_open": "a", "end_key_open": "z"}]), + ]) + + def test_revise_request_rowset_ranges(self, in_ranges, last_key, expected): + next_key = last_key + "a" + row_set = {"row_keys": [next_key], "row_ranges": in_ranges} + revised = self._get_target_class()._revise_request_rowset(row_set, last_key) + assert revised["row_keys"] == [next_key] + assert revised["row_ranges"] == expected + + @pytest.mark.parametrize("last_key", ["a", "b", "c"]) + def test_revise_request_full_table(self, last_key): + row_set = {"row_keys": [], "row_ranges": []} + for selected_set in [row_set, None]: + revised = self._get_target_class()._revise_request_rowset(selected_set, last_key) + assert revised["row_keys"] == [] + assert len(revised["row_ranges"]) == 1 + assert revised["row_ranges"][0]["start_key_open"] == last_key + def test_revise_to_empty_rowset(self): # ensure that the _revise_to_empty_set method # does not return a full table scan - pass + row_keys = ["a", "b", "c"] + row_set = {"row_keys": row_keys, "row_ranges": [{"end_key_open": "c"}]} + revised = self._get_target_class()._revise_request_rowset(row_set, "d") + assert revised == row_set + assert len(revised["row_keys"]) == 3 + assert revised["row_keys"] == row_keys class TestStateMachine(unittest.TestCase): From 2bec69311dcae0b784d1e7984083b5b316b3850f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 18 Apr 2023 18:04:41 -0700 Subject: [PATCH 283/349] ran blacken --- google/cloud/bigtable/_read_rows.py | 13 +++-- tests/unit/test__read_rows.py | 83 ++++++++++++++++++++--------- 2 files changed, 69 insertions(+), 27 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index b80fe2d38..91e01f273 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -265,7 +265,10 @@ def _revise_request_rowset( - last_seen_row_key: the last row key encountered """ # if user is doing a whole table scan, start a new one with the last seen key - if row_set is None or (len(row_set.get("row_ranges", [])) == 0 and len(row_set.get("row_keys", [])) == 0): + if row_set is None or ( + len(row_set.get("row_ranges", [])) == 0 + and len(row_set.get("row_keys", [])) == 0 + ): last_seen = last_seen_row_key return { "row_keys": [], @@ -282,11 +285,15 @@ def _revise_request_rowset( row_ranges: list[dict[str, Any]] = row_set.get("row_ranges", []) adjusted_ranges = [] for row_range in row_ranges: - end_key = row_range.get("end_key_closed", None) or row_range.get("end_key_open", None) + end_key = row_range.get("end_key_closed", None) or row_range.get( + "end_key_open", None + ) if end_key is None or end_key > last_seen_row_key: # end range is after last seen key new_range = row_range.copy() - start_key = row_range.get("start_key_closed", None) or row_range.get("start_key_open", None) + start_key = row_range.get( + "start_key_closed", None + ) or row_range.get("start_key_open", None) if start_key is None or start_key <= last_seen_row_key: # replace start key with last seen new_range["start_key_open"] = last_seen_row_key diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index fdb8fbe2f..2a50cd8c5 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -10,7 +10,7 @@ TEST_LABELS = ["label1", "label2"] -class TestReadRowsOperation(): +class TestReadRowsOperation: @staticmethod def _get_target_class(): from google.cloud.bigtable._read_rows import _ReadRowsOperation @@ -20,13 +20,16 @@ def _get_target_class(): def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) - @pytest.mark.parametrize("in_keys,last_key,expected", [ - (["b", "c", "d"], "a", ["b", "c", "d"]), - (["a", "b", "c"], "b", ["c"]), - (["a", "b", "c"], "c", []), - (["a", "b", "c"], "d", []), - (["d", "c", "b", "a"], "b", ["d", "c"]), - ]) + @pytest.mark.parametrize( + "in_keys,last_key,expected", + [ + (["b", "c", "d"], "a", ["b", "c", "d"]), + (["a", "b", "c"], "b", ["c"]), + (["a", "b", "c"], "c", []), + (["a", "b", "c"], "d", []), + (["d", "c", "b", "a"], "b", ["d", "c"]), + ], + ) def test_revise_request_rowset_keys(self, in_keys, last_key, expected): sample_range = {"start_key_open": last_key} row_set = {"row_keys": in_keys, "row_ranges": [sample_range]} @@ -34,21 +37,51 @@ def test_revise_request_rowset_keys(self, in_keys, last_key, expected): assert revised["row_keys"] == expected assert revised["row_ranges"] == [sample_range] - @pytest.mark.parametrize("in_ranges,last_key,expected", [ - ([{"start_key_open": "b", "end_key_closed": "d"}], "a", [{"start_key_open": "b", "end_key_closed": "d"}]), - ([{"start_key_closed": "b", "end_key_closed": "d"}], "a", [{"start_key_closed": "b", "end_key_closed": "d"}]), - ([{"start_key_open": "a", "end_key_closed": "d"}], "b", [{"start_key_open": "b", "end_key_closed": "d"}]), - ([{"start_key_closed": "a", "end_key_open": "d"}], "b", [{"start_key_open": "b", "end_key_open": "d"}]), - ([{"start_key_closed": "b", "end_key_closed": "d"}], "b", [{"start_key_open": "b", "end_key_closed": "d"}]), - ([{"start_key_closed": "b", "end_key_closed": "d"}], "d", []), - ([{"start_key_closed": "b", "end_key_open": "d"}], "d", []), - ([{"start_key_closed": "b", "end_key_closed": "d"}], "e", []), - ([{"start_key_closed": "b"}], "z", [{"start_key_open": "z"}]), - ([{"start_key_closed": "b"}], "a", [{"start_key_closed": "b"}]), - ([{"end_key_closed": "z"}], "a", [{"start_key_open": "a", "end_key_closed": "z"}]), - ([{"end_key_open": "z"}], "a", [{"start_key_open": "a", "end_key_open": "z"}]), - ]) - + @pytest.mark.parametrize( + "in_ranges,last_key,expected", + [ + ( + [{"start_key_open": "b", "end_key_closed": "d"}], + "a", + [{"start_key_open": "b", "end_key_closed": "d"}], + ), + ( + [{"start_key_closed": "b", "end_key_closed": "d"}], + "a", + [{"start_key_closed": "b", "end_key_closed": "d"}], + ), + ( + [{"start_key_open": "a", "end_key_closed": "d"}], + "b", + [{"start_key_open": "b", "end_key_closed": "d"}], + ), + ( + [{"start_key_closed": "a", "end_key_open": "d"}], + "b", + [{"start_key_open": "b", "end_key_open": "d"}], + ), + ( + [{"start_key_closed": "b", "end_key_closed": "d"}], + "b", + [{"start_key_open": "b", "end_key_closed": "d"}], + ), + ([{"start_key_closed": "b", "end_key_closed": "d"}], "d", []), + ([{"start_key_closed": "b", "end_key_open": "d"}], "d", []), + ([{"start_key_closed": "b", "end_key_closed": "d"}], "e", []), + ([{"start_key_closed": "b"}], "z", [{"start_key_open": "z"}]), + ([{"start_key_closed": "b"}], "a", [{"start_key_closed": "b"}]), + ( + [{"end_key_closed": "z"}], + "a", + [{"start_key_open": "a", "end_key_closed": "z"}], + ), + ( + [{"end_key_open": "z"}], + "a", + [{"start_key_open": "a", "end_key_open": "z"}], + ), + ], + ) def test_revise_request_rowset_ranges(self, in_ranges, last_key, expected): next_key = last_key + "a" row_set = {"row_keys": [next_key], "row_ranges": in_ranges} @@ -60,7 +93,9 @@ def test_revise_request_rowset_ranges(self, in_ranges, last_key, expected): def test_revise_request_full_table(self, last_key): row_set = {"row_keys": [], "row_ranges": []} for selected_set in [row_set, None]: - revised = self._get_target_class()._revise_request_rowset(selected_set, last_key) + revised = self._get_target_class()._revise_request_rowset( + selected_set, last_key + ) assert revised["row_keys"] == [] assert len(revised["row_ranges"]) == 1 assert revised["row_ranges"][0]["start_key_open"] == last_key From 5cd8e006e8be4157dd47f2d9975f1e7997a34930 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 11:11:12 -0700 Subject: [PATCH 284/349] added constructor tests --- tests/unit/test__read_rows.py | 79 ++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 6 deletions(-) diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index 2a50cd8c5..aeafe7893 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -20,6 +20,61 @@ def _get_target_class(): def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) + def test_ctor_defaults(self): + from types import AsyncGeneratorType + + request = {} + client = mock.Mock() + client.read_rows = mock.Mock() + client.read_rows.return_value = None + instance = self._make_one(request, client) + assert instance.transient_errors == [] + assert instance._last_seen_row_key is None + assert instance._emit_count == 0 + assert isinstance(instance._stream, AsyncGeneratorType) + retryable_fn = instance._partial_retryable + assert retryable_fn.func == instance._read_rows_retryable_attempt + assert retryable_fn.args[0] == client.read_rows + assert retryable_fn.args[1] == 0 + assert retryable_fn.args[2] == None + assert retryable_fn.args[3] == None + assert retryable_fn.args[4] == 0 + assert client.read_rows.call_count == 0 + + def test_ctor(self): + from types import AsyncGeneratorType + + row_limit = 91 + request = {"rows_limit": row_limit} + client = mock.Mock() + client.read_rows = mock.Mock() + client.read_rows.return_value = None + expected_buffer_size = 21 + expected_operation_timeout = 42 + expected_row_timeout = 43 + expected_request_timeout = 44 + instance = self._make_one( + request, + client, + buffer_size=expected_buffer_size, + operation_timeout=expected_operation_timeout, + per_row_timeout=expected_row_timeout, + per_request_timeout=expected_request_timeout, + ) + assert instance.transient_errors == [] + assert instance._last_seen_row_key is None + assert instance._emit_count == 0 + assert instance.operation_timeout == expected_operation_timeout + assert isinstance(instance._stream, AsyncGeneratorType) + retryable_fn = instance._partial_retryable + assert retryable_fn.func == instance._read_rows_retryable_attempt + assert retryable_fn.args[0] == client.read_rows + assert retryable_fn.args[1] == expected_buffer_size + assert retryable_fn.args[2] == expected_row_timeout + assert retryable_fn.args[3] == expected_request_timeout + assert retryable_fn.args[4] == row_limit + assert client.read_rows.call_count == 0 + @pytest.mark.parametrize( "in_keys,last_key,expected", [ @@ -121,6 +176,11 @@ def _get_target_class(): def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) + def test_ctor(self): + # ensure that the _StateMachine constructor + # sets the initial state + pass + class TestState(unittest.TestCase): pass @@ -178,7 +238,7 @@ def test_start_cell(self): def test_cell_value(self): row_builder = self._make_one() row_builder.start_row(b"row_key") - with self.assertRaises(InvalidChunk) as e: + with self.assertRaises(InvalidChunk): # start_cell must be called before cell_value row_builder.cell_value(b"cell_value") row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) @@ -195,8 +255,12 @@ def test_finish_cell(self): row_builder.finish_cell() self.assertEqual(len(row_builder.completed_cells), 1) self.assertEqual(row_builder.completed_cells[0].family, TEST_FAMILY) - self.assertEqual(row_builder.completed_cells[0].column_qualifier, TEST_QUALIFIER) - self.assertEqual(row_builder.completed_cells[0].timestamp_micros, TEST_TIMESTAMP) + self.assertEqual( + row_builder.completed_cells[0].column_qualifier, TEST_QUALIFIER + ) + self.assertEqual( + row_builder.completed_cells[0].timestamp_micros, TEST_TIMESTAMP + ) self.assertEqual(row_builder.completed_cells[0].labels, TEST_LABELS) self.assertEqual(row_builder.completed_cells[0].value, b"") self.assertEqual(row_builder.working_cell, None) @@ -208,8 +272,12 @@ def test_finish_cell(self): row_builder.finish_cell() self.assertEqual(len(row_builder.completed_cells), 2) self.assertEqual(row_builder.completed_cells[1].family, TEST_FAMILY) - self.assertEqual(row_builder.completed_cells[1].column_qualifier, TEST_QUALIFIER) - self.assertEqual(row_builder.completed_cells[1].timestamp_micros, TEST_TIMESTAMP) + self.assertEqual( + row_builder.completed_cells[1].column_qualifier, TEST_QUALIFIER + ) + self.assertEqual( + row_builder.completed_cells[1].timestamp_micros, TEST_TIMESTAMP + ) self.assertEqual(row_builder.completed_cells[1].labels, TEST_LABELS) self.assertEqual(row_builder.completed_cells[1].value, b"cell_valueappended") self.assertEqual(row_builder.working_cell, None) @@ -249,7 +317,6 @@ def test_finish_row(self): self.assertEqual(output[i].labels, TEST_LABELS) self.assertEqual(output[i].value, b"cell_value: " + str(i).encode("utf-8")) - def finish_row_no_row(self): with self.assertRaises(InvalidChunk) as e: self._make_one().finish_row() From d6f3ae1392da769bb9291df956ea9e18eea6eb0a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 11:25:31 -0700 Subject: [PATCH 285/349] upgraded submodule --- google/cloud/bigtable/_read_rows.py | 2 +- python-api-core | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index 91e01f273..9417ad068 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -116,7 +116,7 @@ def on_error_fn(exc): multiplier=2, maximum=60, on_error=on_error_fn, - is_generator=True, + is_stream=True, ) self._stream: AsyncGenerator[Row | RequestStats, None] | None = retry( self._partial_retryable diff --git a/python-api-core b/python-api-core index 6cb3e2dc6..74f3f3ef7 160000 --- a/python-api-core +++ b/python-api-core @@ -1 +1 @@ -Subproject commit 6cb3e2dc6edac2b4b4c22496a3b507ceed3c5a24 +Subproject commit 74f3f3ef7a9325a3d6f605eb5359928481982223 From f2d7e714812f4ec1a357f342564385f5ea85b259 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 12:33:52 -0700 Subject: [PATCH 286/349] added tests --- google/cloud/bigtable/_read_rows.py | 11 +- tests/unit/test__read_rows.py | 162 ++++++++++++++++++++++++++-- 2 files changed, 158 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index 9417ad068..4b4c7fba9 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -137,7 +137,7 @@ async def __anext__(self) -> Row | RequestStats: async def aclose(self): """Close the stream and release resources""" - if isinstance(self._stream, AsyncGenerator): + if self._stream is not None: await self._stream.aclose() self._stream = None self._last_seen_row_key = None @@ -199,8 +199,8 @@ async def _read_rows_retryable_attempt( last_seen_row_key=self._last_seen_row_key, ) # revise next request's row limit based on number emitted - if row_limit: - new_limit = row_limit - self._emit_count + if total_row_limit: + new_limit = total_row_limit - self._emit_count if new_limit <= 0: return else: @@ -239,7 +239,7 @@ async def _read_rows_retryable_attempt( if not isinstance(new_item, _LastScannedRow): yield new_item self._emit_count += 1 - if row_limit and self._emit_count >= row_limit: + if total_row_limit and self._emit_count >= total_row_limit: return except asyncio.TimeoutError: # per_row_timeout from asyncio.wait_for @@ -381,9 +381,6 @@ def _reset_row(self) -> None: self.current_state: _State = AWAITING_NEW_ROW(self) self.current_family: str | None = None self.current_qualifier: bytes | None = None - # self.expected_cell_size:int = 0 - # self.remaining_cell_bytes:int = 0 - # self.num_cells_in_row:int = 0 self.adapter.reset() def is_terminal_state(self) -> bool: diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index aeafe7893..51575e55e 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -11,6 +11,12 @@ class TestReadRowsOperation: + """ + Tests helper functions in the ReadRowsOperation class + in-depth merging logic in merge_row_response_stream and _read_rows_retryable_attempt + is tested in test_read_rows_acceptance test_client_read_rows, and conformance tests + """ + @staticmethod def _get_target_class(): from google.cloud.bigtable._read_rows import _ReadRowsOperation @@ -21,8 +27,6 @@ def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) def test_ctor_defaults(self): - from types import AsyncGeneratorType - request = {} client = mock.Mock() client.read_rows = mock.Mock() @@ -31,19 +35,16 @@ def test_ctor_defaults(self): assert instance.transient_errors == [] assert instance._last_seen_row_key is None assert instance._emit_count == 0 - assert isinstance(instance._stream, AsyncGeneratorType) retryable_fn = instance._partial_retryable assert retryable_fn.func == instance._read_rows_retryable_attempt assert retryable_fn.args[0] == client.read_rows assert retryable_fn.args[1] == 0 - assert retryable_fn.args[2] == None - assert retryable_fn.args[3] == None + assert retryable_fn.args[2] is None + assert retryable_fn.args[3] is None assert retryable_fn.args[4] == 0 assert client.read_rows.call_count == 0 def test_ctor(self): - from types import AsyncGeneratorType - row_limit = 91 request = {"rows_limit": row_limit} client = mock.Mock() @@ -65,7 +66,6 @@ def test_ctor(self): assert instance._last_seen_row_key is None assert instance._emit_count == 0 assert instance.operation_timeout == expected_operation_timeout - assert isinstance(instance._stream, AsyncGeneratorType) retryable_fn = instance._partial_retryable assert retryable_fn.func == instance._read_rows_retryable_attempt assert retryable_fn.args[0] == client.read_rows @@ -75,6 +75,22 @@ def test_ctor(self): assert retryable_fn.args[4] == row_limit assert client.read_rows.call_count == 0 + @pytest.mark.asyncio + async def test_transient_error_capture(self): + from google.api_core import exceptions as core_exceptions + + client = mock.Mock() + client.read_rows = mock.Mock() + test_exc = core_exceptions.Aborted("test") + test_exc2 = core_exceptions.DeadlineExceeded("test") + client.read_rows.side_effect = [test_exc, test_exc2] + instance = self._make_one({}, client) + with pytest.raises(RuntimeError): + await instance.__anext__() + assert len(instance.transient_errors) == 2 + assert instance.transient_errors[0] == test_exc + assert instance.transient_errors[1] == test_exc2 + @pytest.mark.parametrize( "in_keys,last_key,expected", [ @@ -165,6 +181,136 @@ def test_revise_to_empty_rowset(self): assert len(revised["row_keys"]) == 3 assert revised["row_keys"] == row_keys + @pytest.mark.parametrize( + "start_limit,emit_num,expected_limit", + [ + (10, 0, 10), + (10, 1, 9), + (10, 10, 0), + (0, 10, 0), + (0, 0, 0), + (4, 2, 2), + (3, 9, 0), + ], + ) + @pytest.mark.asyncio + async def test_revise_limit(self, start_limit, emit_num, expected_limit): + request = {"rows_limit": start_limit} + instance = self._make_one(request, mock.Mock()) + instance._emit_count = emit_num + instance._last_seen_row_key = "a" + gapic_mock = mock.Mock() + gapic_mock.side_effect = [RuntimeError("stop_fn")] + attempt = instance._read_rows_retryable_attempt( + gapic_mock, 0, None, None, start_limit + ) + if start_limit != 0 and expected_limit == 0: + # if we emitted the expected number of rows, we should receive a StopAsyncIteration + with pytest.raises(StopAsyncIteration): + await attempt.__anext__() + else: + with pytest.raises(RuntimeError): + await attempt.__anext__() + assert request["rows_limit"] == expected_limit + + @pytest.mark.asyncio + async def test__generator_to_buffer(self): + import asyncio + + async def test_generator(n): + for i in range(n): + yield i + + out_buffer = asyncio.Queue() + await self._get_target_class()._generator_to_buffer( + out_buffer, test_generator(10) + ) + assert out_buffer.qsize() == 11 + for i in range(10): + assert out_buffer.get_nowait() == i + assert out_buffer.get_nowait() == StopAsyncIteration + assert out_buffer.empty() + + @pytest.mark.asyncio + async def test__generator_to_buffer_with_error(self): + import asyncio + + async def test_generator(n, error_at=2): + for i in range(n): + if i == error_at: + raise ValueError("test error") + else: + yield i + + out_buffer = asyncio.Queue() + await self._get_target_class()._generator_to_buffer( + out_buffer, test_generator(10, error_at=4) + ) + assert out_buffer.qsize() == 5 + for i in range(4): + assert out_buffer.get_nowait() == i + assert isinstance(out_buffer.get_nowait(), ValueError) + assert out_buffer.empty() + + @pytest.mark.asyncio + async def test__buffer_to_generator(self): + import asyncio + + buffer = asyncio.Queue() + for i in range(10): + buffer.put_nowait(i) + buffer.put_nowait(StopAsyncIteration) + gen = self._get_target_class()._buffer_to_generator(buffer) + for i in range(10): + assert await gen.__anext__() == i + with pytest.raises(StopAsyncIteration): + await gen.__anext__() + + @pytest.mark.asyncio + async def test__buffer_to_generator_with_error(self): + import asyncio + + buffer = asyncio.Queue() + for i in range(4): + buffer.put_nowait(i) + test_error = ValueError("test error") + buffer.put_nowait(test_error) + gen = self._get_target_class()._buffer_to_generator(buffer) + for i in range(4): + assert await gen.__anext__() == i + with pytest.raises(ValueError) as e: + await gen.__anext__() + assert e.value == test_error + + @pytest.mark.asyncio + async def test_generator_to_buffer_to_generator(self): + import asyncio + + async def test_generator(): + for i in range(10): + yield i + + buffer = asyncio.Queue() + await self._get_target_class()._generator_to_buffer(buffer, test_generator()) + out_gen = self._get_target_class()._buffer_to_generator(buffer) + + out_expected = [i async for i in test_generator()] + out_actual = [i async for i in out_gen] + assert out_expected == out_actual + + @pytest.mark.asyncio + async def test_aclose(self): + import asyncio + + instance = self._make_one({}, mock.Mock()) + await instance.aclose() + assert instance._stream is None + assert instance._last_seen_row_key is None + with pytest.raises(asyncio.InvalidStateError): + await instance.__anext__() + # try calling a second time + await instance.aclose() + class TestStateMachine(unittest.TestCase): @staticmethod From cb23d3208143ba5328cbb85ace833035102237d3 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 21:49:59 +0000 Subject: [PATCH 287/349] update docstring Co-authored-by: Jack Wotherspoon --- google/cloud/bigtable/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index d2c74264f..a40c16f57 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -63,7 +63,7 @@ def __init__( """ Create a client instance for the Bigtable Data API - Client must be created within an async run loop context + Client should be created within an async context (running event loop) Args: project: the project which the client acts on behalf of. From bc31ab805b4ad4fb9d94553827d2ba9b6b6b8edb Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 21:50:22 +0000 Subject: [PATCH 288/349] update docstring Co-authored-by: Jack Wotherspoon --- google/cloud/bigtable/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index a40c16f57..c688dfba8 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -80,7 +80,7 @@ def __init__( Client options used to set user options on the client. API Endpoint should be set through client_options. Raises: - - RuntimeError if called outside of an async run loop context + - RuntimeError if called outside of an async context (no running event loop) - ValueError if pool_size is less than 1 """ # set up transport in registry From f54dfde313da7198f8d09c5d26fbbe002a362ec7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 21:50:47 +0000 Subject: [PATCH 289/349] fix typo Co-authored-by: Jack Wotherspoon --- google/cloud/bigtable/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index c688dfba8..87339f1ab 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -167,7 +167,7 @@ async def _ping_and_warm_instances( Args: channel: grpc channel to ping Returns: - - squence of results or exceptions from the ping requests + - sequence of results or exceptions from the ping requests """ ping_rpc = channel.unary_unary( "/google.bigtable.v2.Bigtable/PingAndWarmChannel" From 46cfc49729719c80c46c1280fafa8a6ee1f00103 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 15:05:56 -0700 Subject: [PATCH 290/349] docstring improvements --- google/cloud/bigtable/client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 87339f1ab..038cecaba 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -273,7 +273,7 @@ def get_table( Returns a table instance for making data API requests Args: - instance_id: The Bigtable instance ID to associate with this client + instance_id: The Bigtable instance ID to associate with this client. instance_id is combined with the client's project to fully specify the instance table_id: The ID of the table. @@ -309,23 +309,23 @@ def __init__( """ Initialize a Table instance - Must be created within an async run loop context + Must be created within an async context (running event loop) Args: - instance_id: The Bigtable instance ID to associate with this client + instance_id: The Bigtable instance ID to associate with this client. instance_id is combined with the client's project to fully specify the instance table_id: The ID of the table. app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles Raises: - - RuntimeError if called outside of an async run loop context + - RuntimeError if called outside of an async context (no running event loop) """ self.client = client self.instance = instance_id self.table_id = table_id self.app_profile_id = app_profile_id - # raises RuntimeError if called outside of an async run loop context + # raises RuntimeError if called outside of an async context (no running event loop) try: self._register_instance_task = asyncio.create_task( self.client.register_instance(instance_id) From 573bbd12e0c4c12ae8e5dd46c5df48a1c3b7e924 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 15:21:02 -0700 Subject: [PATCH 291/349] made creating table outside loop into error --- google/cloud/bigtable/client.py | 17 +++++++--------- tests/unit/test_client.py | 36 +++++++++++++++------------------ 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 038cecaba..0f74bc6d0 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -224,7 +224,7 @@ async def _manage_channel( next_refresh = random.uniform(refresh_interval_min, refresh_interval_max) next_sleep = next_refresh - (time.time() - start_timestamp) - async def register_instance(self, instance_id: str): + async def _register_instance(self, instance_id: str): """ Registers an instance with the client, and warms the channel pool for the instance @@ -244,7 +244,7 @@ async def register_instance(self, instance_id: str): # refresh tasks aren't active. start them as background tasks self.start_background_channel_refresh() - async def remove_instance_registration(self, instance_id: str) -> bool: + async def _remove_instance_registration(self, instance_id: str) -> bool: """ Removes an instance from the client's registered instances, to prevent warming new channels for the instance @@ -328,15 +328,12 @@ def __init__( # raises RuntimeError if called outside of an async context (no running event loop) try: self._register_instance_task = asyncio.create_task( - self.client.register_instance(instance_id) - ) - except RuntimeError: - warnings.warn( - "Table should be created in an asyncio event loop." - " Instance will not be registered with client for refresh", - RuntimeWarning, - stacklevel=2, + self.client._register_instance(instance_id) ) + except RuntimeError as e: + raise RuntimeError( + f"{self.__class__.__name__} must be created within an async event loop context." + ) from e async def read_rows_stream( self, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 78495d9e3..14107bc8a 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -504,7 +504,7 @@ async def test__manage_channel_refresh(num_cycles): @pytest.mark.asyncio @pytest.mark.filterwarnings("ignore::RuntimeWarning") -async def test_register_instance(): +async def test__register_instance(): # create the client without calling start_background_channel_refresh with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: get_event_loop.side_effect = RuntimeError("no event loop") @@ -512,7 +512,7 @@ async def test_register_instance(): assert not client._channel_refresh_tasks # first call should start background refresh assert client._active_instances == set() - await client.register_instance("instance-1") + await client._register_instance("instance-1") assert len(client._active_instances) == 1 assert client._active_instances == {"projects/project-id/instances/instance-1"} assert client._channel_refresh_tasks @@ -520,7 +520,7 @@ async def test_register_instance(): with mock.patch.object( type(_make_one()), "start_background_channel_refresh" ) as refresh_mock: - await client.register_instance("instance-2") + await client._register_instance("instance-2") assert len(client._active_instances) == 2 assert client._active_instances == { "projects/project-id/instances/instance-1", @@ -531,7 +531,7 @@ async def test_register_instance(): @pytest.mark.asyncio @pytest.mark.filterwarnings("ignore::RuntimeWarning") -async def test_register_instance_ping_and_warm(): +async def test__register_instance_ping_and_warm(): # should ping and warm each new instance pool_size = 7 with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: @@ -539,34 +539,34 @@ async def test_register_instance_ping_and_warm(): client = _make_one(project="project-id", pool_size=pool_size) # first call should start background refresh assert not client._channel_refresh_tasks - await client.register_instance("instance-1") + await client._register_instance("instance-1") client = _make_one(project="project-id", pool_size=pool_size) assert len(client._channel_refresh_tasks) == pool_size assert not client._active_instances # next calls should trigger ping and warm with mock.patch.object(type(_make_one()), "_ping_and_warm_instances") as ping_mock: # new instance should trigger ping and warm - await client.register_instance("instance-2") + await client._register_instance("instance-2") assert ping_mock.call_count == pool_size - await client.register_instance("instance-3") + await client._register_instance("instance-3") assert ping_mock.call_count == pool_size * 2 # duplcate instances should not trigger ping and warm - await client.register_instance("instance-3") + await client._register_instance("instance-3") assert ping_mock.call_count == pool_size * 2 await client.close() @pytest.mark.asyncio -async def test_remove_instance_registration(): +async def test__remove_instance_registration(): client = _make_one(project="project-id") - await client.register_instance("instance-1") - await client.register_instance("instance-2") + await client._register_instance("instance-1") + await client._register_instance("instance-2") assert len(client._active_instances) == 2 - success = await client.remove_instance_registration("instance-1") + success = await client._remove_instance_registration("instance-1") assert success assert len(client._active_instances) == 1 assert client._active_instances == {"projects/project-id/instances/instance-2"} - success = await client.remove_instance_registration("nonexistant") + success = await client._remove_instance_registration("nonexistant") assert not success assert len(client._active_instances) == 1 await client.close() @@ -730,10 +730,6 @@ def test_table_ctor_sync(): from google.cloud.bigtable.client import Table client = mock.Mock() - with pytest.warns(RuntimeWarning) as warnings: - table = Table(client, "instance-id", "table-id") - assert "event loop" in str(warnings[0].message) - assert table.table_id == "table-id" - assert table.instance == "instance-id" - assert table.app_profile_id is None - assert table.client is client + with pytest.raises(RuntimeError) as e: + Table(client, "instance-id", "table-id") + assert e.match("Table must be created within an async event loop context.") From 4f2657d18bed51286cbdd098495344e36da59de1 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 16:11:00 -0700 Subject: [PATCH 292/349] make tables own active instances, and remove instances when tables close --- google/cloud/bigtable/client.py | 51 +++++++++++++++-- tests/unit/test_client.py | 99 +++++++++++++++++++++++++++++---- 2 files changed, 134 insertions(+), 16 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 0f74bc6d0..d62319d38 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -114,6 +114,7 @@ def __init__( ) # keep track of active instances to for warmup on channel refresh self._active_instances: Set[str] = set() + self._instance_owners: dict[str, Set[Table]] = {} # attempt to start background tasks self._channel_init_time = time.time() self._channel_refresh_tasks: list[asyncio.Task[None]] = [] @@ -224,15 +225,22 @@ async def _manage_channel( next_refresh = random.uniform(refresh_interval_min, refresh_interval_max) next_sleep = next_refresh - (time.time() - start_timestamp) - async def _register_instance(self, instance_id: str): + async def _register_instance(self, instance_id: str, owner: Table) -> None: """ Registers an instance with the client, and warms the channel pool for the instance The client will periodically refresh grpc channel pool used to make requests, and new channels will be warmed for each registered instance Channels will not be refreshed unless at least one instance is registered + + Args: + - instance_id: id of the instance to register. + - owner: table that owns the instance. Owners will be tracked in + _instance_owners, and instances will only be unregistered when all + owners call _remove_instance_registration """ instance_name = self._gapic_client.instance_path(self.project, instance_id) + self._instance_owners.setdefault(instance_name, set()).add(owner) if instance_name not in self._active_instances: self._active_instances.add(instance_name) if self._channel_refresh_tasks: @@ -244,21 +252,27 @@ async def _register_instance(self, instance_id: str): # refresh tasks aren't active. start them as background tasks self.start_background_channel_refresh() - async def _remove_instance_registration(self, instance_id: str) -> bool: + async def _remove_instance_registration(self, instance_id: str, owner:Table) -> bool: """ Removes an instance from the client's registered instances, to prevent warming new channels for the instance - If instance_id is not registered, returns False + If instance_id is not registered, or is still in use by other tables, returns False Args: - instance_id: id of the instance to remove + - instance_id: id of the instance to remove + - owner: table that owns the instance. Owners will be tracked in + _instance_owners, and instances will only be unregistered when all + owners call _remove_instance_registration Returns: - True if instance was removed """ instance_name = self._gapic_client.instance_path(self.project, instance_id) + owner_list = self._instance_owners.get(instance_name, set()) try: - self._active_instances.remove(instance_name) + owner_list.remove(owner) + if len(owner_list) == 0: + self._active_instances.remove(instance_name) return True except KeyError: return False @@ -328,7 +342,7 @@ def __init__( # raises RuntimeError if called outside of an async context (no running event loop) try: self._register_instance_task = asyncio.create_task( - self.client._register_instance(instance_id) + self.client._register_instance(instance_id, self) ) except RuntimeError as e: raise RuntimeError( @@ -658,3 +672,28 @@ async def read_modify_write_row( - GoogleAPIError exceptions from grpc call """ raise NotImplementedError + + async def close(self): + """ + Called to close the Table instance and release any resources held by it. + """ + await self.client._remove_instance_registration(self.instance, self) + + async def __aenter__(self): + """ + Implement async context manager protocol + + Register this instance with the client, so that + grpc channels will be warmed for the specified instance + """ + await self.client._register_instance(self.instance, self) + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """ + Implement async context manager protocol + + Unregister this instance with the client, so that + grpc channels will no longer be warmed + """ + await self.close() diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 14107bc8a..2b0ec3f68 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -512,7 +512,7 @@ async def test__register_instance(): assert not client._channel_refresh_tasks # first call should start background refresh assert client._active_instances == set() - await client._register_instance("instance-1") + await client._register_instance("instance-1", mock.Mock()) assert len(client._active_instances) == 1 assert client._active_instances == {"projects/project-id/instances/instance-1"} assert client._channel_refresh_tasks @@ -520,7 +520,7 @@ async def test__register_instance(): with mock.patch.object( type(_make_one()), "start_background_channel_refresh" ) as refresh_mock: - await client._register_instance("instance-2") + await client._register_instance("instance-2", mock.Mock()) assert len(client._active_instances) == 2 assert client._active_instances == { "projects/project-id/instances/instance-1", @@ -539,19 +539,19 @@ async def test__register_instance_ping_and_warm(): client = _make_one(project="project-id", pool_size=pool_size) # first call should start background refresh assert not client._channel_refresh_tasks - await client._register_instance("instance-1") + await client._register_instance("instance-1", mock.Mock()) client = _make_one(project="project-id", pool_size=pool_size) assert len(client._channel_refresh_tasks) == pool_size assert not client._active_instances # next calls should trigger ping and warm with mock.patch.object(type(_make_one()), "_ping_and_warm_instances") as ping_mock: # new instance should trigger ping and warm - await client._register_instance("instance-2") + await client._register_instance("instance-2", mock.Mock()) assert ping_mock.call_count == pool_size - await client._register_instance("instance-3") + await client._register_instance("instance-3", mock.Mock()) assert ping_mock.call_count == pool_size * 2 # duplcate instances should not trigger ping and warm - await client._register_instance("instance-3") + await client._register_instance("instance-3", mock.Mock()) assert ping_mock.call_count == pool_size * 2 await client.close() @@ -559,18 +559,72 @@ async def test__register_instance_ping_and_warm(): @pytest.mark.asyncio async def test__remove_instance_registration(): client = _make_one(project="project-id") - await client._register_instance("instance-1") - await client._register_instance("instance-2") + table = mock.Mock() + await client._register_instance("instance-1", table) + await client._register_instance("instance-2", table) assert len(client._active_instances) == 2 - success = await client._remove_instance_registration("instance-1") + assert len(client._instance_owners.keys()) == 2 + instance_1_path = client._gapic_client.instance_path(client.project, "instance-1") + instance_2_path = client._gapic_client.instance_path(client.project, "instance-2") + assert len(client._instance_owners[instance_1_path]) == 1 + assert list(client._instance_owners[instance_1_path])[0] is table + assert len(client._instance_owners[instance_2_path]) == 1 + assert list(client._instance_owners[instance_2_path])[0] is table + success = await client._remove_instance_registration("instance-1", table) assert success assert len(client._active_instances) == 1 + assert len(client._instance_owners[instance_1_path]) == 0 + assert len(client._instance_owners[instance_2_path]) == 1 assert client._active_instances == {"projects/project-id/instances/instance-2"} - success = await client._remove_instance_registration("nonexistant") + success = await client._remove_instance_registration("nonexistant", table) assert not success assert len(client._active_instances) == 1 await client.close() +@pytest.mark.asyncio +async def test__multiple_table_registration(): + async with _make_one(project="project-id") as client: + async with client.get_table("instance_1", "table_1") as table_1: + instance_1_path = client._gapic_client.instance_path(client.project, "instance_1") + assert len(client._instance_owners[instance_1_path]) == 1 + assert len(client._active_instances) == 1 + assert table_1 in client._instance_owners[instance_1_path] + async with client.get_table("instance_1", "table_2") as table_2: + assert len(client._instance_owners[instance_1_path]) == 2 + assert len(client._active_instances) == 1 + assert table_1 in client._instance_owners[instance_1_path] + assert table_2 in client._instance_owners[instance_1_path] + # table_2 should be unregistered, but instance should still be active + assert len(client._active_instances) == 1 + assert instance_1_path in client._active_instances + assert table_2 not in client._instance_owners[instance_1_path] + # both tables are gone. instance should be unregistered + assert len(client._active_instances) == 0 + assert instance_1_path not in client._active_instances + assert len(client._instance_owners[instance_1_path]) == 0 + +async def test__multiple_instance_registration(): + async with _make_one(project="project-id") as client: + async with client.get_table("instance_1", "table_1") as table_1: + async with client.get_table("instance_2", "table_2") as table_2: + instance_1_path = client._gapic_client.instance_path(client.project, "instance_1") + instance_2_path = client._gapic_client.instance_path(client.project, "instance_2") + assert len(client._instance_owners[instance_1_path]) == 1 + assert len(client._instance_owners[instance_2_path]) == 1 + assert len(client._active_instances) == 2 + assert table_1 in client._instance_owners[instance_1_path] + assert table_2 in client._instance_owners[instance_2_path] + # instance2 should be unregistered, but instance1 should still be active + assert len(client._active_instances) == 1 + assert instance_1_path in client._active_instances + assert len(client._instance_owners[instance_2_path]) == 0 + assert len(client._instance_owners[instance_1_path]) == 1 + assert table_1 in client._instance_owners[instance_1_path] + # both tables are gone. instances should both be unregistered + assert len(client._active_instances) == 0 + assert len(client._instance_owners[instance_1_path]) == 0 + assert len(client._instance_owners[instance_2_path]) == 0 + @pytest.mark.asyncio async def test_get_table(): @@ -598,6 +652,31 @@ async def test_get_table(): assert full_instance_name in client._active_instances await client.close() +@pytest.mark.asyncio +async def test_get_table_context_manager(): + from google.cloud.bigtable.client import Table + expected_table_id = "table-id" + expected_instance_id = "instance-id" + expected_app_profile_id = "app-profile-id" + + with mock.patch.object(Table, "close") as close_mock: + async with _make_one(project="project-id") as client: + async with client.get_table( + expected_instance_id, + expected_table_id, + expected_app_profile_id, + ) as table: + await asyncio.sleep(0) + assert isinstance(table, Table) + assert table.table_id == expected_table_id + assert table.instance == expected_instance_id + assert table.app_profile_id == expected_app_profile_id + assert table.client is client + full_instance_name = client._gapic_client.instance_path( + client.project, expected_instance_id + ) + assert full_instance_name in client._active_instances + assert close_mock.call_count == 1 @pytest.mark.asyncio async def test_multiple_pool_sizes(): From 59955beaebd5f0d194385ddabfaba3d418092606 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 16:33:45 -0700 Subject: [PATCH 293/349] added pool_size and channels as public properties --- gapic-generator-fork | 2 +- google/cloud/bigtable/client.py | 6 ++-- .../transports/pooled_grpc_asyncio.py | 32 +++++++++++++------ 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 14e00e28d..9aea0a6f2 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 14e00e28d4fa7c50cbfbffe754e31d060285ee7b +Subproject commit 9aea0a6f20297b34a20f1e9ac6208a4982b644fb diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index d62319d38..2729961db 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -137,7 +137,7 @@ def start_background_channel_refresh(self) -> None: if not self._channel_refresh_tasks: # raise RuntimeError if there is no event loop asyncio.get_running_loop() - for channel_idx in range(len(self.transport._grpc_channel._pool)): + for channel_idx in range(self.transport.pool_size): refresh_task = asyncio.create_task(self._manage_channel(channel_idx)) if sys.version_info >= (3, 8): # task names supported in Python 3.8+ @@ -208,7 +208,7 @@ async def _manage_channel( next_sleep = max(first_refresh - time.time(), 0) if next_sleep > 0: # warm the current channel immediately - channel = self.transport._grpc_channel._pool[channel_idx] + channel = self.transport.channels[channel_idx] await self._ping_and_warm_instances(channel) # continuously refresh the channel every `refresh_interval` seconds while True: @@ -246,7 +246,7 @@ async def _register_instance(self, instance_id: str, owner: Table) -> None: if self._channel_refresh_tasks: # refresh tasks already running # call ping and warm on all existing channels - for channel in self.transport._grpc_channel._pool: + for channel in self.transport.channels: await self._ping_and_warm_instances(channel) else: # refresh tasks aren't active. start them as background tasks diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index 0b937c566..63efb1348 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -88,19 +88,23 @@ def __init__( credentials_file: Optional[str] = None, scopes: Optional[Sequence[str]] = None, quota_project_id: Optional[str] = None, + insecure: bool = False, **kwargs, ): self._pool: List[aio.Channel] = [] self._next_idx = 0 - self._create_channel = partial( - grpc_helpers_async.create_channel, - target=host, - credentials=credentials, - credentials_file=credentials_file, - scopes=scopes, - quota_project_id=quota_project_id, - **kwargs, - ) + if insecure: + self._create_channel = partial(aio.insecure_channel, host) + else: + self._create_channel = partial( + grpc_helpers_async.create_channel, + target=host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + **kwargs, + ) for i in range(pool_size): self._pool.append(self._create_channel()) @@ -379,6 +383,16 @@ def __init__( # Wrap messages. This must be done after self._grpc_channel exists self._prep_wrapped_messages(client_info) + @propery + def pool_size(self) -> int: + """The number of grpc channels in the pool.""" + return len(self._grpc_channel._pool) + + @property + def channels(self) -> List[grpc.Channel]: + """Acccess the internal list of grpc channels.""" + return self._grpc_channel._pool + async def replace_channel( self, channel_idx, grace=None, swap_sleep=1, new_channel=None ) -> aio.Channel: From 377a8c96e0ca434a0a0744777b733098fd2cad2c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 16:56:25 -0700 Subject: [PATCH 294/349] fixed typo --- .../services/bigtable/transports/pooled_grpc_asyncio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index 63efb1348..fdf3dd8a0 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -383,7 +383,7 @@ def __init__( # Wrap messages. This must be done after self._grpc_channel exists self._prep_wrapped_messages(client_info) - @propery + @property def pool_size(self) -> int: """The number of grpc channels in the pool.""" return len(self._grpc_channel._pool) From 8a29898d7aaf8a1864544f0f1f35bc2380e8c4c0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 17:04:42 -0700 Subject: [PATCH 295/349] simplified pooled multicallable --- gapic-generator-fork | 2 +- .../transports/pooled_grpc_asyncio.py | 52 ++++++------------- 2 files changed, 18 insertions(+), 36 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 9aea0a6f2..cd4627b0d 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 9aea0a6f20297b34a20f1e9ac6208a4982b644fb +Subproject commit cd4627b0de8a2273fab1081a0bedde20077ba60c diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index fdf3dd8a0..c9ab6c438 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -43,40 +43,22 @@ class PooledMultiCallable: - def __init__(self, channel_pool: "PooledChannel", *args, **kwargs): + def __init__( + self, channel_pool: "PooledChannel", call_fn_name: str, *args, **kwargs + ): self._init_args = args self._init_kwargs = kwargs self.next_channel_fn = channel_pool.next_channel - - -class PooledUnaryUnaryMultiCallable(PooledMultiCallable, aio.UnaryUnaryMultiCallable): - def __call__(self, *args, **kwargs) -> aio.UnaryUnaryCall: - return self.next_channel_fn().unary_unary( - *self._init_args, **self._init_kwargs - )(*args, **kwargs) - - -class PooledUnaryStreamMultiCallable(PooledMultiCallable, aio.UnaryStreamMultiCallable): - def __call__(self, *args, **kwargs) -> aio.UnaryStreamCall: - return self.next_channel_fn().unary_stream( - *self._init_args, **self._init_kwargs - )(*args, **kwargs) - - -class PooledStreamUnaryMultiCallable(PooledMultiCallable, aio.StreamUnaryMultiCallable): - def __call__(self, *args, **kwargs) -> aio.StreamUnaryCall: - return self.next_channel_fn().stream_unary( - *self._init_args, **self._init_kwargs - )(*args, **kwargs) - - -class PooledStreamStreamMultiCallable( - PooledMultiCallable, aio.StreamStreamMultiCallable -): - def __call__(self, *args, **kwargs) -> aio.StreamStreamCall: - return self.next_channel_fn().stream_stream( - *self._init_args, **self._init_kwargs - )(*args, **kwargs) + self.call_fn_name = call_fn_name + self._stubs: dict[aio.Channel, Callable] = {} + + def __call__(self, *args, **kwargs) -> aio.Call: + channel = self.next_channel_fn() + if channel not in self._stubs: + self._stubs[channel] = getattr(channel, self.call_fn_name)( + *self._init_args, **self._init_kwargs + ) + return self._stubs[channel](*args, **kwargs) class PooledChannel(aio.Channel): @@ -114,16 +96,16 @@ def next_channel(self) -> aio.Channel: return channel def unary_unary(self, *args, **kwargs) -> grpc.aio.UnaryUnaryMultiCallable: - return PooledUnaryUnaryMultiCallable(self, *args, **kwargs) + return PooledMultiCallable(self, "unary_unary", *args, **kwargs) def unary_stream(self, *args, **kwargs) -> grpc.aio.UnaryStreamMultiCallable: - return PooledUnaryStreamMultiCallable(self, *args, **kwargs) + return PooledMultiCallable(self, "unary_stream", *args, **kwargs) def stream_unary(self, *args, **kwargs) -> grpc.aio.StreamUnaryMultiCallable: - return PooledStreamUnaryMultiCallable(self, *args, **kwargs) + return PooledMultiCallable(self, "stream_unary", *args, **kwargs) def stream_stream(self, *args, **kwargs) -> grpc.aio.StreamStreamMultiCallable: - return PooledStreamStreamMultiCallable(self, *args, **kwargs) + return PooledMultiCallable(self, "stream_stream", *args, **kwargs) async def close(self, grace=None): close_fns = [channel.close(grace=grace) for channel in self._pool] From 50aa5baea40d84daab2a4c3e929ef28277b91203 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 17:07:21 -0700 Subject: [PATCH 296/349] ran blacken --- google/cloud/bigtable/client.py | 4 +++- tests/unit/test_client.py | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 2729961db..cfe1f678f 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -252,7 +252,9 @@ async def _register_instance(self, instance_id: str, owner: Table) -> None: # refresh tasks aren't active. start them as background tasks self.start_background_channel_refresh() - async def _remove_instance_registration(self, instance_id: str, owner:Table) -> bool: + async def _remove_instance_registration( + self, instance_id: str, owner: Table + ) -> bool: """ Removes an instance from the client's registered instances, to prevent warming new channels for the instance diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2b0ec3f68..09b2a0852 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -581,11 +581,14 @@ async def test__remove_instance_registration(): assert len(client._active_instances) == 1 await client.close() + @pytest.mark.asyncio async def test__multiple_table_registration(): async with _make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: - instance_1_path = client._gapic_client.instance_path(client.project, "instance_1") + instance_1_path = client._gapic_client.instance_path( + client.project, "instance_1" + ) assert len(client._instance_owners[instance_1_path]) == 1 assert len(client._active_instances) == 1 assert table_1 in client._instance_owners[instance_1_path] @@ -603,12 +606,17 @@ async def test__multiple_table_registration(): assert instance_1_path not in client._active_instances assert len(client._instance_owners[instance_1_path]) == 0 + async def test__multiple_instance_registration(): async with _make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: async with client.get_table("instance_2", "table_2") as table_2: - instance_1_path = client._gapic_client.instance_path(client.project, "instance_1") - instance_2_path = client._gapic_client.instance_path(client.project, "instance_2") + instance_1_path = client._gapic_client.instance_path( + client.project, "instance_1" + ) + instance_2_path = client._gapic_client.instance_path( + client.project, "instance_2" + ) assert len(client._instance_owners[instance_1_path]) == 1 assert len(client._instance_owners[instance_2_path]) == 1 assert len(client._active_instances) == 2 @@ -652,9 +660,11 @@ async def test_get_table(): assert full_instance_name in client._active_instances await client.close() + @pytest.mark.asyncio async def test_get_table_context_manager(): from google.cloud.bigtable.client import Table + expected_table_id = "table-id" expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" @@ -678,6 +688,7 @@ async def test_get_table_context_manager(): assert full_instance_name in client._active_instances assert close_mock.call_count == 1 + @pytest.mark.asyncio async def test_multiple_pool_sizes(): # should be able to create multiple clients with different pool sizes without issue From 42a52a3e1430581bf982d40bf4689da7a161d969 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 19 Apr 2023 18:01:13 -0700 Subject: [PATCH 297/349] associate ids with instances, instead of Table objects --- google/cloud/bigtable/client.py | 8 +++++--- tests/unit/test_client.py | 31 ++++++++++++------------------- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index cfe1f678f..dacf864bf 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -114,7 +114,9 @@ def __init__( ) # keep track of active instances to for warmup on channel refresh self._active_instances: Set[str] = set() - self._instance_owners: dict[str, Set[Table]] = {} + # keep track of table objects associated with each instance + # only remove instance from _active_instances when all associated tables remove it + self._instance_owners: dict[str, Set[int]] = {} # attempt to start background tasks self._channel_init_time = time.time() self._channel_refresh_tasks: list[asyncio.Task[None]] = [] @@ -240,7 +242,7 @@ async def _register_instance(self, instance_id: str, owner: Table) -> None: owners call _remove_instance_registration """ instance_name = self._gapic_client.instance_path(self.project, instance_id) - self._instance_owners.setdefault(instance_name, set()).add(owner) + self._instance_owners.setdefault(instance_name, set()).add(id(owner)) if instance_name not in self._active_instances: self._active_instances.add(instance_name) if self._channel_refresh_tasks: @@ -272,7 +274,7 @@ async def _remove_instance_registration( instance_name = self._gapic_client.instance_path(self.project, instance_id) owner_list = self._instance_owners.get(instance_name, set()) try: - owner_list.remove(owner) + owner_list.remove(id(owner)) if len(owner_list) == 0: self._active_instances.remove(instance_name) return True diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 09b2a0852..f82750764 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -567,9 +567,9 @@ async def test__remove_instance_registration(): instance_1_path = client._gapic_client.instance_path(client.project, "instance-1") instance_2_path = client._gapic_client.instance_path(client.project, "instance-2") assert len(client._instance_owners[instance_1_path]) == 1 - assert list(client._instance_owners[instance_1_path])[0] is table + assert list(client._instance_owners[instance_1_path])[0] == id(table) assert len(client._instance_owners[instance_2_path]) == 1 - assert list(client._instance_owners[instance_2_path])[0] is table + assert list(client._instance_owners[instance_2_path])[0] == id(table) success = await client._remove_instance_registration("instance-1", table) assert success assert len(client._active_instances) == 1 @@ -586,21 +586,19 @@ async def test__remove_instance_registration(): async def test__multiple_table_registration(): async with _make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: - instance_1_path = client._gapic_client.instance_path( - client.project, "instance_1" - ) + instance_1_path = client._gapic_client.instance_path(client.project, "instance_1") assert len(client._instance_owners[instance_1_path]) == 1 assert len(client._active_instances) == 1 - assert table_1 in client._instance_owners[instance_1_path] + assert id(table_1) in client._instance_owners[instance_1_path] async with client.get_table("instance_1", "table_2") as table_2: assert len(client._instance_owners[instance_1_path]) == 2 assert len(client._active_instances) == 1 - assert table_1 in client._instance_owners[instance_1_path] - assert table_2 in client._instance_owners[instance_1_path] + assert id(table_1) in client._instance_owners[instance_1_path] + assert id(table_2) in client._instance_owners[instance_1_path] # table_2 should be unregistered, but instance should still be active assert len(client._active_instances) == 1 assert instance_1_path in client._active_instances - assert table_2 not in client._instance_owners[instance_1_path] + assert id(table_2) not in client._instance_owners[instance_1_path] # both tables are gone. instance should be unregistered assert len(client._active_instances) == 0 assert instance_1_path not in client._active_instances @@ -611,23 +609,19 @@ async def test__multiple_instance_registration(): async with _make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: async with client.get_table("instance_2", "table_2") as table_2: - instance_1_path = client._gapic_client.instance_path( - client.project, "instance_1" - ) - instance_2_path = client._gapic_client.instance_path( - client.project, "instance_2" - ) + instance_1_path = client._gapic_client.instance_path(client.project, "instance_1") + instance_2_path = client._gapic_client.instance_path(client.project, "instance_2") assert len(client._instance_owners[instance_1_path]) == 1 assert len(client._instance_owners[instance_2_path]) == 1 assert len(client._active_instances) == 2 - assert table_1 in client._instance_owners[instance_1_path] - assert table_2 in client._instance_owners[instance_2_path] + assert id(table_1) in client._instance_owners[instance_1_path] + assert id(table_2) in client._instance_owners[instance_2_path] # instance2 should be unregistered, but instance1 should still be active assert len(client._active_instances) == 1 assert instance_1_path in client._active_instances assert len(client._instance_owners[instance_2_path]) == 0 assert len(client._instance_owners[instance_1_path]) == 1 - assert table_1 in client._instance_owners[instance_1_path] + assert id(table_1) in client._instance_owners[instance_1_path] # both tables are gone. instances should both be unregistered assert len(client._active_instances) == 0 assert len(client._instance_owners[instance_1_path]) == 0 @@ -664,7 +658,6 @@ async def test_get_table(): @pytest.mark.asyncio async def test_get_table_context_manager(): from google.cloud.bigtable.client import Table - expected_table_id = "table-id" expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" From abc7a2dfc363655a0a835d39f7914ad4ea81c1de Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 20 Apr 2023 09:50:48 -0700 Subject: [PATCH 298/349] fixed tests --- google/cloud/bigtable/_read_rows.py | 2 +- tests/unit/test_client_read_rows.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index 4b4c7fba9..b892f432e 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -177,7 +177,7 @@ async def _read_rows_retryable_attempt( buffer_size: int, per_row_timeout: float | None, per_request_timeout: float | None, - row_limit: int, + total_row_limit: int, ) -> AsyncGenerator[Row | RequestStats, None]: """ Retryable wrapper for merge_rows. This function is called each time diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index e341adf37..f30b87741 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -205,10 +205,10 @@ async def test_read_rows_timeout(operation_timeout): "per_row_t, operation_t, expected_num", [ (0.1, 0.01, 0), - (0.01, 0.015, 1), + (0.1, 0.19, 1), (0.05, 0.54, 10), (0.05, 0.14, 2), - (0.05, 0.21, 4), + (0.05, 0.24, 4), ], ) @pytest.mark.asyncio @@ -249,7 +249,7 @@ async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): @pytest.mark.parametrize( "per_request_t, operation_t, expected_num", [ - (0.01, 0.015, 1), + (0.05, 0.09, 1), (0.05, 0.54, 10), (0.05, 0.14, 2), (0.05, 0.24, 4), From 836af0ff46c019eb9a9040813e2d756e4c218979 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 20 Apr 2023 12:15:51 -0700 Subject: [PATCH 299/349] made sure that empty strings are valid family and qualifier inputs --- google/cloud/bigtable/_read_rows.py | 36 ++++++++++++++++++----------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index b892f432e..4b7462a7f 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -428,7 +428,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> Row | None: if chunk.commit_row: # check if row is complete, and return it if so if not isinstance(self.current_state, AWAITING_NEW_CELL): - raise InvalidChunk("commit row attempted without finishing cell") + raise InvalidChunk("Commit chunk received in invalid state") complete_row = self.adapter.finish_row() self._handle_complete_row(complete_row) return complete_row @@ -457,10 +457,10 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): raise InvalidChunk("Reset chunk received when not processing row") if chunk.row_key: raise InvalidChunk("Reset chunk has a row key") - if chunk.family_name and chunk.family_name.value: - raise InvalidChunk("Reset chunk has family_name") - if chunk.qualifier and chunk.qualifier.value: - raise InvalidChunk("Reset chunk has qualifier") + if _chunk_has_field(chunk, "family_name"): + raise InvalidChunk("Reset chunk has a family name") + if _chunk_has_field(chunk, "qualifier"): + raise InvalidChunk("Reset chunk has a qualifier") if chunk.timestamp_micros: raise InvalidChunk("Reset chunk has a timestamp") if chunk.labels: @@ -469,7 +469,6 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): raise InvalidChunk("Reset chunk has a value") self._reset_row() - class _State(ABC): """ Represents a state the state machine can be in @@ -504,7 +503,6 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": # force the chunk processing in the AWAITING_CELL_VALUE. return AWAITING_NEW_CELL(self._owner).handle_chunk(chunk) - class AWAITING_NEW_CELL(_State): """ Represents a cell boundary witin a row @@ -516,13 +514,12 @@ class AWAITING_NEW_CELL(_State): def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": is_split = chunk.value_size > 0 - # expected_cell_size = chunk.value_size if is_split else len(chunk.value) # track latest cell data. New chunks won't send repeated data - if chunk.family_name.value: + if _chunk_has_field(chunk, "family_name"): self._owner.current_family = chunk.family_name.value - if not chunk.qualifier.value: + if not _chunk_has_field(chunk, "qualifier"): raise InvalidChunk("New column family must specify qualifier") - if chunk.qualifier.value: + if _chunk_has_field(chunk, "qualifier"): self._owner.current_qualifier = chunk.qualifier.value if self._owner.current_family is None: raise InvalidChunk("Family not found") @@ -532,7 +529,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": if chunk.row_key and chunk.row_key != self._owner.adapter.current_key: raise InvalidChunk("Row key changed mid row") - if not self._owner.current_family: + if self._owner.current_family is None: raise InvalidChunk("Missing family for a new cell") if self._owner.current_qualifier is None: raise InvalidChunk("Missing qualifier for a new cell") @@ -566,9 +563,9 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": # ensure reset chunk matches expectations if chunk.row_key: raise InvalidChunk("Found row key mid cell") - if chunk.family_name.value: + if chunk_has_field_set(chunk, "family_name"): raise InvalidChunk("In progress cell had a family name") - if chunk.qualifier.value: + if _chunk_has_field_set(chunk, "qualifier"): raise InvalidChunk("In progress cell had a qualifier") if chunk.timestamp_micros: raise InvalidChunk("In progress cell had a timestamp") @@ -652,3 +649,14 @@ def finish_row(self) -> Row: new_row = Row(self.current_key, self.completed_cells) self.reset() return new_row + +def _chunk_has_field(chunk: ReadRowsResponse.CellChunk, field: str) -> bool: + """ + Returns true if the field is set on the chunk + + Required to disambiguate between empty strings and unset values + """ + try: + return chunk.HasField(field) + except ValueError: + return False From e73551de3d1ea2a4f27a6ddb003308dd04f2178f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 20 Apr 2023 12:17:37 -0700 Subject: [PATCH 300/349] added tests for state machine --- google/cloud/bigtable/_read_rows.py | 4 +- tests/unit/test__read_rows.py | 198 +++++++++++++++++++++++++++- 2 files changed, 196 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index 4b7462a7f..c0ef19133 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -563,9 +563,9 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": # ensure reset chunk matches expectations if chunk.row_key: raise InvalidChunk("Found row key mid cell") - if chunk_has_field_set(chunk, "family_name"): + if _chunk_has_field(chunk, "family_name"): raise InvalidChunk("In progress cell had a family name") - if _chunk_has_field_set(chunk, "qualifier"): + if _chunk_has_field(chunk, "qualifier"): raise InvalidChunk("In progress cell had a qualifier") if chunk.timestamp_micros: raise InvalidChunk("In progress cell had a timestamp") diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index 51575e55e..df9f0ae31 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -3,6 +3,9 @@ import pytest from google.cloud.bigtable.exceptions import InvalidChunk +from google.cloud.bigtable._read_rows import AWAITING_NEW_ROW +from google.cloud.bigtable._read_rows import AWAITING_NEW_CELL +from google.cloud.bigtable._read_rows import AWAITING_CELL_VALUE TEST_FAMILY = "family_name" TEST_QUALIFIER = b"column_qualifier" @@ -313,6 +316,7 @@ async def test_aclose(self): class TestStateMachine(unittest.TestCase): + @staticmethod def _get_target_class(): from google.cloud.bigtable._read_rows import _StateMachine @@ -323,10 +327,196 @@ def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) def test_ctor(self): - # ensure that the _StateMachine constructor - # sets the initial state - pass - + from google.cloud.bigtable._read_rows import _RowBuilder + instance = self._make_one() + assert instance.last_seen_row_key is None + assert isinstance(instance.current_state, AWAITING_NEW_ROW) + assert instance.current_family is None + assert instance.current_qualifier is None + assert isinstance(instance.adapter, _RowBuilder) + assert instance.adapter.current_key is None + assert instance.adapter.working_cell is None + assert instance.adapter.working_value is None + assert instance.adapter.completed_cells == [] + + def test_is_terminal_state(self): + + instance = self._make_one() + assert instance.is_terminal_state() is True + instance.current_state = AWAITING_NEW_ROW(None) + assert instance.is_terminal_state() is True + instance.current_state = AWAITING_NEW_CELL(None) + assert instance.is_terminal_state() is False + instance.current_state = AWAITING_CELL_VALUE(None) + assert instance.is_terminal_state() is False + + def test__reset_row(self): + instance = self._make_one() + instance.current_state = mock.Mock() + instance.current_family = "family" + instance.current_qualifier = "qualifier" + instance.adapter = mock.Mock() + instance._reset_row() + assert isinstance(instance.current_state, AWAITING_NEW_ROW) + assert instance.current_family is None + assert instance.current_qualifier is None + assert instance.adapter.reset.call_count == 1 + + def test_handle_last_scanned_row_wrong_state(self): + from google.cloud.bigtable.exceptions import InvalidChunk + instance = self._make_one() + instance.current_state = AWAITING_NEW_CELL(None) + with pytest.raises(InvalidChunk) as e: + instance.handle_last_scanned_row('row_key') + assert e.value.args[0] == "Last scanned row key received in invalid state" + instance.current_state = AWAITING_CELL_VALUE(None) + with pytest.raises(InvalidChunk) as e: + instance.handle_last_scanned_row('row_key') + assert e.value.args[0] == "Last scanned row key received in invalid state" + + def test_handle_last_scanned_row_out_of_order(self): + from google.cloud.bigtable.exceptions import InvalidChunk + instance = self._make_one() + instance.last_seen_row_key = b"b" + with pytest.raises(InvalidChunk) as e: + instance.handle_last_scanned_row(b"a") + assert e.value.args[0] == "Last scanned row key is out of order" + with pytest.raises(InvalidChunk) as e: + instance.handle_last_scanned_row(b"b") + assert e.value.args[0] == "Last scanned row key is out of order" + + def test_handle_last_scanned_row(self): + from google.cloud.bigtable.row import _LastScannedRow + instance = self._make_one() + instance.adapter = mock.Mock() + instance.last_seen_row_key = b"a" + output_row = instance.handle_last_scanned_row(b"b") + assert instance.last_seen_row_key == b"b" + assert isinstance(output_row, _LastScannedRow) + assert output_row.row_key == b"b" + assert isinstance(instance.current_state, AWAITING_NEW_ROW) + assert instance.current_family is None + assert instance.current_qualifier is None + assert instance.adapter.reset.call_count == 1 + + def test__handle_complete_row(self): + from google.cloud.bigtable.row import Row + instance = self._make_one() + instance.current_state = mock.Mock() + instance.current_family = "family" + instance.current_qualifier = "qualifier" + instance.adapter = mock.Mock() + instance._handle_complete_row(Row(b"row_key", {})) + assert instance.last_seen_row_key == b"row_key" + assert isinstance(instance.current_state, AWAITING_NEW_ROW) + assert instance.current_family is None + assert instance.current_qualifier is None + assert instance.adapter.reset.call_count == 1 + + def test__handle_reset_chunk_errors(self): + from google.cloud.bigtable.exceptions import InvalidChunk + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = self._make_one() + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk(mock.Mock()) + instance.current_state = mock.Mock() + assert e.value.args[0] == "Reset chunk received when not processing row" + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk(ReadRowsResponse.CellChunk(row_key=b"row_key")._pb) + assert e.value.args[0] == "Reset chunk has a row key" + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk(ReadRowsResponse.CellChunk(family_name="family")._pb) + assert e.value.args[0] == "Reset chunk has a family name" + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk(ReadRowsResponse.CellChunk(qualifier=b"qualifier")._pb) + assert e.value.args[0] == "Reset chunk has a qualifier" + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk(ReadRowsResponse.CellChunk(timestamp_micros=1)._pb) + assert e.value.args[0] == "Reset chunk has a timestamp" + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk(ReadRowsResponse.CellChunk(value=b"value")._pb) + assert e.value.args[0] == "Reset chunk has a value" + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk(ReadRowsResponse.CellChunk(labels=["label"])._pb) + assert e.value.args[0] == "Reset chunk has labels" + + def test_handle_chunk_out_of_order(self): + from google.cloud.bigtable.exceptions import InvalidChunk + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = self._make_one() + instance.last_seen_row_key = b"b" + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(row_key=b"a")._pb + instance.handle_chunk(chunk) + assert "increasing" in e.value.args[0] + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(row_key=b"b")._pb + instance.handle_chunk(chunk) + assert "increasing" in e.value.args[0] + + + def test_handle_chunk_reset(self): + """Should call _handle_reset_chunk when a chunk with reset_row is encountered""" + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = self._make_one() + with mock.patch.object(type(instance), "_handle_reset_chunk") as mock_reset: + chunk = ReadRowsResponse.CellChunk(reset_row=True)._pb + output = instance.handle_chunk(chunk) + assert output is None + assert mock_reset.call_count == 1 + + @pytest.mark.parametrize("state", [AWAITING_NEW_ROW, AWAITING_CELL_VALUE]) + def handle_chunk_with_commit_wrong_state(self, state): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = self._make_one() + with mock.patch.object(type(instance.current_state), "handle_chunk") as mock_state_handle: + mock_state_handle.return_value = state(mock.Mock()) + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(commit_row=True)._pb + instance.handle_chunk(chunk) + assert isinstance(instance.current_state, state) + assert e.value.args[0] == "Commit chunk received with in invalid state" + + def test_handle_chunk_with_commit(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.row import Row + instance = self._make_one() + with mock.patch.object(type(instance), "_reset_row") as mock_reset: + chunk = ReadRowsResponse.CellChunk(row_key=b"row_key", family_name="f", qualifier=b"q", commit_row=True)._pb + output = instance.handle_chunk(chunk) + assert isinstance(output, Row) + assert output.row_key == b"row_key" + assert output[0].family == "f" + assert output[0].column_qualifier == b"q" + assert instance.last_seen_row_key == b"row_key" + assert mock_reset.call_count == 1 + + + def test_handle_chunk_with_commit_empty_strings(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.row import Row + instance = self._make_one() + with mock.patch.object(type(instance), "_reset_row") as mock_reset: + chunk = ReadRowsResponse.CellChunk(row_key=b"row_key", family_name="", qualifier=b"", commit_row=True)._pb + output = instance.handle_chunk(chunk) + assert isinstance(output, Row) + assert output.row_key == b"row_key" + assert output[0].family == "" + assert output[0].column_qualifier == b"" + assert instance.last_seen_row_key == b"row_key" + assert mock_reset.call_count == 1 + + + def handle_chunk_incomplete(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.row import Row + instance = self._make_one() + chunk = ReadRowsResponse.CellChunk(row_key=b"row_key", family_name="f", qualifier=b"q", commit_row=False)._pb + output = instance.handle_chunk(chunk) + assert output is None + assert isinstance(instance.current_state, AWAITING_CELL_VALUE) + assert instance.current_family == "f" + assert instance.current_qualifier == b"q" class TestState(unittest.TestCase): pass From 792aba188ae3e659096dacc54cddb07d84b90463 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 20 Apr 2023 13:31:21 -0700 Subject: [PATCH 301/349] added state machine tests --- google/cloud/bigtable/_read_rows.py | 6 +- tests/unit/test__read_rows.py | 220 +++++++++++++++++++++++++++- 2 files changed, 222 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index c0ef19133..b63d5602f 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -518,7 +518,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": if _chunk_has_field(chunk, "family_name"): self._owner.current_family = chunk.family_name.value if not _chunk_has_field(chunk, "qualifier"): - raise InvalidChunk("New column family must specify qualifier") + raise InvalidChunk("New family must specify qualifier") if _chunk_has_field(chunk, "qualifier"): self._owner.current_qualifier = chunk.qualifier.value if self._owner.current_family is None: @@ -530,9 +530,9 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": raise InvalidChunk("Row key changed mid row") if self._owner.current_family is None: - raise InvalidChunk("Missing family for a new cell") + raise InvalidChunk("Missing family for new cell") if self._owner.current_qualifier is None: - raise InvalidChunk("Missing qualifier for a new cell") + raise InvalidChunk("Missing qualifier for new cell") self._owner.adapter.start_cell( family=self._owner.current_family, diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index df9f0ae31..f8c405acf 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -519,7 +519,209 @@ def handle_chunk_incomplete(self): assert instance.current_qualifier == b"q" class TestState(unittest.TestCase): - pass + + def test_AWAITING_NEW_ROW_empty_key(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = AWAITING_NEW_ROW(mock.Mock()) + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(row_key=b"")._pb + instance.handle_chunk(chunk) + assert "missing a row key" in e.value.args[0] + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk()._pb + instance.handle_chunk(chunk) + assert "missing a row key" in e.value.args[0] + + def test_AWAITING_NEW_ROW(self): + """ + AWAITING_NEW_ROW should start a RowBuilder row, then + delegate the call to AWAITING_NEW_CELL + """ + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = AWAITING_NEW_ROW(mock.Mock()) + with mock.patch.object(AWAITING_NEW_CELL, "handle_chunk") as mock_delegate: + chunk = ReadRowsResponse.CellChunk(row_key=b"row_key")._pb + output = instance.handle_chunk(chunk) + assert instance._owner.adapter.start_row.call_count == 1 + assert instance._owner.adapter.start_row.call_args[0][0] == b"row_key" + mock_delegate.assert_called_once_with(chunk) + + def test_AWAITING_NEW_CELL_family_without_qualifier(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + state_machine.current_qualifier = b"q" + instance = AWAITING_NEW_CELL(state_machine) + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(family_name="fam")._pb + instance.handle_chunk(chunk) + assert "New family must specify qualifier" in e.value.args[0] + + def test_AWAITING_NEW_CELL_qualifier_without_family(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + instance = AWAITING_NEW_CELL(state_machine) + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(qualifier=b"q")._pb + instance.handle_chunk(chunk) + assert "Family not found" in e.value.args[0] + + def test_AWAITING_NEW_CELL_no_row_state(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + instance = AWAITING_NEW_CELL(state_machine) + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk()._pb + instance.handle_chunk(chunk) + assert "Missing family for new cell" in e.value.args[0] + state_machine.current_family = "fam" + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk()._pb + instance.handle_chunk(chunk) + assert "Missing qualifier for new cell" in e.value.args[0] + + def test_AWAITING_NEW_CELL_invalid_row_key(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + instance = AWAITING_NEW_CELL(state_machine) + state_machine.adapter.current_key = b"abc" + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(row_key=b"123")._pb + instance.handle_chunk(chunk) + assert "Row key changed mid row" in e.value.args[0] + + def test_AWAITING_NEW_CELL_success_no_split(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + state_machine.adapter = mock.Mock() + instance = AWAITING_NEW_CELL(state_machine) + row_key = b"row_key" + family = "fam" + qualifier = b"q" + labels = ["label"] + timestamp = 123 + value = b"value" + chunk = ReadRowsResponse.CellChunk(row_key=row_key, family_name=family, qualifier=qualifier, timestamp_micros=timestamp, value=value, labels=labels)._pb + state_machine.adapter.current_key = row_key + new_state = instance.handle_chunk(chunk) + assert state_machine.adapter.start_cell.call_count == 1 + kwargs = state_machine.adapter.start_cell.call_args[1] + assert kwargs["family"] == family + assert kwargs["qualifier"] == qualifier + assert kwargs["timestamp_micros"] == timestamp + assert kwargs["labels"] == labels + assert state_machine.adapter.cell_value.call_count == 1 + assert state_machine.adapter.cell_value.call_args[0][0] == value + assert state_machine.adapter.finish_cell.call_count == 1 + assert isinstance(new_state, AWAITING_NEW_CELL) + + def test_AWAITING_NEW_CELL_success_with_split(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + state_machine.adapter = mock.Mock() + instance = AWAITING_NEW_CELL(state_machine) + row_key = b"row_key" + family = "fam" + qualifier = b"q" + labels = ["label"] + timestamp = 123 + value = b"value" + chunk = ReadRowsResponse.CellChunk(value_size=1, row_key=row_key, family_name=family, qualifier=qualifier, timestamp_micros=timestamp, value=value, labels=labels)._pb + state_machine.adapter.current_key = row_key + new_state = instance.handle_chunk(chunk) + assert state_machine.adapter.start_cell.call_count == 1 + kwargs = state_machine.adapter.start_cell.call_args[1] + assert kwargs["family"] == family + assert kwargs["qualifier"] == qualifier + assert kwargs["timestamp_micros"] == timestamp + assert kwargs["labels"] == labels + assert state_machine.adapter.cell_value.call_count == 1 + assert state_machine.adapter.cell_value.call_args[0][0] == value + assert state_machine.adapter.finish_cell.call_count == 0 + assert isinstance(new_state, AWAITING_CELL_VALUE) + + def test_AWAITING_CELL_VALUE_w_row_key(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + instance = AWAITING_CELL_VALUE(state_machine) + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(row_key=b"123")._pb + instance.handle_chunk(chunk) + assert "In progress cell had a row key" in e.value.args[0] + + def test_AWAITING_CELL_VALUE_w_row_key(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + instance = AWAITING_CELL_VALUE(state_machine) + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(family_name="")._pb + instance.handle_chunk(chunk) + assert "In progress cell had a family name" in e.value.args[0] + + def test_AWAITING_CELL_VALUE_w_qualifier(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + instance = AWAITING_CELL_VALUE(state_machine) + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(qualifier=b"")._pb + instance.handle_chunk(chunk) + assert "In progress cell had a qualifier" in e.value.args[0] + + def test_AWAITING_CELL_VALUE_w_timestamp(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + instance = AWAITING_CELL_VALUE(state_machine) + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(timestamp_micros=123)._pb + instance.handle_chunk(chunk) + assert "In progress cell had a timestamp" in e.value.args[0] + + def test_AWAITING_CELL_VALUE_w_labels(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + instance = AWAITING_CELL_VALUE(state_machine) + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(labels=[""])._pb + instance.handle_chunk(chunk) + assert "In progress cell had labels" in e.value.args[0] + + def test_AWAITING_CELL_VALUE_continuation(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + state_machine.adapter = mock.Mock() + instance = AWAITING_CELL_VALUE(state_machine) + value = b"value" + chunk = ReadRowsResponse.CellChunk(value=value, value_size=1)._pb + new_state = instance.handle_chunk(chunk) + assert state_machine.adapter.cell_value.call_count == 1 + assert state_machine.adapter.cell_value.call_args[0][0] == value + assert state_machine.adapter.finish_cell.call_count == 0 + assert isinstance(new_state, AWAITING_CELL_VALUE) + + def test_AWAITING_CELL_VALUE_final_chunk(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() + state_machine.adapter = mock.Mock() + instance = AWAITING_CELL_VALUE(state_machine) + value = b"value" + chunk = ReadRowsResponse.CellChunk(value=value, value_size=0)._pb + new_state = instance.handle_chunk(chunk) + assert state_machine.adapter.cell_value.call_count == 1 + assert state_machine.adapter.cell_value.call_args[0][0] == value + assert state_machine.adapter.finish_cell.call_count == 1 + assert isinstance(new_state, AWAITING_NEW_CELL) class TestRowBuilder(unittest.TestCase): @@ -672,3 +874,19 @@ def test_reset(self): self.assertEqual(row_builder.working_cell, None) self.assertEqual(row_builder.working_value, None) self.assertEqual(len(row_builder.completed_cells), 0) + +class TestChunkHasField(): + + def test__chunk_has_field_empty(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _chunk_has_field + chunk = ReadRowsResponse.CellChunk()._pb + assert not _chunk_has_field(chunk, "family_name") + assert not _chunk_has_field(chunk, "qualifier") + + def test__chunk_has_field_populated_empty_strings(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable._read_rows import _chunk_has_field + chunk = ReadRowsResponse.CellChunk(qualifier=b'', family_name="")._pb + assert _chunk_has_field(chunk, "family_name") + assert _chunk_has_field(chunk, "qualifier") From e57c51048b6ce74f063bf270d11bb10176d2ee4b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 20 Apr 2023 14:22:34 -0700 Subject: [PATCH 302/349] fixed broken mock --- tests/unit/test_client.py | 48 +++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 94ec5cfa8..10bcfba97 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -400,30 +400,30 @@ async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sle import random channel_idx = 1 - random.uniform = mock.Mock() - random.uniform.side_effect = lambda min_, max_: min_ - with mock.patch.object(time, "time") as time: - time.return_value = 0 - with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = [None for i in range(num_cycles - 1)] + [ - asyncio.CancelledError - ] - try: - client = _make_one(project="project-id") - if refresh_interval is not None: - await client._manage_channel( - channel_idx, refresh_interval, refresh_interval - ) - else: - await client._manage_channel(channel_idx) - except asyncio.CancelledError: - pass - assert sleep.call_count == num_cycles - total_sleep = sum([call[0][0] for call in sleep.call_args_list]) - assert ( - abs(total_sleep - expected_sleep) < 0.1 - ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" - await client.close() + with mock.patch.object(random, "uniform") as uniform: + uniform.side_effect = lambda min_, max_: min_ + with mock.patch.object(time, "time") as time: + time.return_value = 0 + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = [None for i in range(num_cycles - 1)] + [ + asyncio.CancelledError + ] + try: + client = _make_one(project="project-id") + if refresh_interval is not None: + await client._manage_channel( + channel_idx, refresh_interval, refresh_interval + ) + else: + await client._manage_channel(channel_idx) + except asyncio.CancelledError: + pass + assert sleep.call_count == num_cycles + total_sleep = sum([call[0][0] for call in sleep.call_args_list]) + assert ( + abs(total_sleep - expected_sleep) < 0.1 + ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" + await client.close() @pytest.mark.asyncio From 88748a9ac77da425c785e816003a75569182749e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 20 Apr 2023 15:24:12 -0700 Subject: [PATCH 303/349] added additional tests --- google/cloud/bigtable/_read_rows.py | 6 ++- tests/unit/test__read_rows.py | 71 ++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index b63d5602f..39518819a 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -152,6 +152,7 @@ async def _generator_to_buffer( try: async for item in input_generator: await buffer.put(item) + await asyncio.sleep(0) await buffer.put(StopAsyncIteration) except Exception as e: await buffer.put(e) @@ -170,6 +171,7 @@ async def _buffer_to_generator( if isinstance(item, Exception): raise item yield item + await asyncio.sleep(0) async def _read_rows_retryable_attempt( self, @@ -484,7 +486,7 @@ def __init__(self, owner: _StateMachine): @abstractmethod def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": - pass + raise NotImplementedError class AWAITING_NEW_ROW(_State): @@ -562,7 +564,7 @@ class AWAITING_CELL_VALUE(_State): def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": # ensure reset chunk matches expectations if chunk.row_key: - raise InvalidChunk("Found row key mid cell") + raise InvalidChunk("In progress cell had a row key") if _chunk_has_field(chunk, "family_name"): raise InvalidChunk("In progress cell had a family name") if _chunk_has_field(chunk, "qualifier"): diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index f8c405acf..7ae64510c 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -78,6 +78,13 @@ def test_ctor(self): assert retryable_fn.args[4] == row_limit assert client.read_rows.call_count == 0 + def test___aiter__(self): + request = {} + client = mock.Mock() + client.read_rows = mock.Mock() + instance = self._make_one(request, client) + assert instance.__aiter__() is instance + @pytest.mark.asyncio async def test_transient_error_capture(self): from google.api_core import exceptions as core_exceptions @@ -314,6 +321,68 @@ async def test_aclose(self): # try calling a second time await instance.aclose() + @pytest.mark.parametrize("limit", [1, 3, 10]) + @pytest.mark.asyncio + async def test_retryable_attempt_hit_limit(self, limit): + """ + Stream should end after hitting the limit + """ + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = self._make_one({}, mock.Mock()) + async def mock_gapic(*args, **kwargs): + # continuously return a single row + async def gen(): + for i in range(limit*2): + chunk = ReadRowsResponse.CellChunk(row_key=str(i).encode(), family_name="family_name", qualifier=b"qualifier", commit_row=True) + yield ReadRowsResponse(chunks=[chunk]) + return gen() + gen = instance._read_rows_retryable_attempt(mock_gapic, 0, None, None, limit) + # should yield values up to the limit + for i in range(limit): + await gen.__anext__() + # next value should be StopAsyncIteration + with pytest.raises(StopAsyncIteration): + await gen.__anext__() + + @pytest.mark.asyncio + async def test_retryable_ignore_repeated_rows(self): + """ + Duplicate rows emitted by stream should be ignored by _read_rows_retryable_attempt + """ + from google.cloud.bigtable._read_rows import _ReadRowsOperation + from google.cloud.bigtable.row import Row + async def mock_stream(): + while True: + yield Row(b"dup_key", cells=[]) + yield Row(b"dup_key", cells=[]) + yield Row(b"new", cells=[]) + with mock.patch.object(_ReadRowsOperation, "merge_row_response_stream") as mock_stream_fn: + mock_stream_fn.return_value = mock_stream() + instance = self._make_one({}, mock.AsyncMock()) + first_row = await instance.__anext__() + assert first_row.row_key == b"dup_key" + second_row = await instance.__anext__() + assert second_row.row_key == b"new" + + @pytest.mark.asyncio + async def test_retryable_ignore_last_scanned_rows(self): + """ + Duplicate rows emitted by stream should be ignored by _read_rows_retryable_attempt + """ + from google.cloud.bigtable._read_rows import _ReadRowsOperation + from google.cloud.bigtable.row import Row, _LastScannedRow + async def mock_stream(): + while True: + yield Row(b"key1", cells=[]) + yield _LastScannedRow(b"ignored") + yield Row(b"key2", cells=[]) + with mock.patch.object(_ReadRowsOperation, "merge_row_response_stream") as mock_stream_fn: + mock_stream_fn.return_value = mock_stream() + instance = self._make_one({}, mock.AsyncMock()) + first_row = await instance.__anext__() + assert first_row.row_key == b"key1" + second_row = await instance.__anext__() + assert second_row.row_key == b"key2" class TestStateMachine(unittest.TestCase): @@ -855,7 +924,7 @@ def test_finish_row(self): self.assertEqual(output[i].labels, TEST_LABELS) self.assertEqual(output[i].value, b"cell_value: " + str(i).encode("utf-8")) - def finish_row_no_row(self): + def test_finish_row_no_row(self): with self.assertRaises(InvalidChunk) as e: self._make_one().finish_row() self.assertEqual(str(e.exception), "No row in progress") From 0c38981f6e8a156d31f59ea41ddd6bbf3779b872 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 20 Apr 2023 15:26:52 -0700 Subject: [PATCH 304/349] ran blacken --- google/cloud/bigtable/_read_rows.py | 3 + tests/unit/test__read_rows.py | 129 ++++++++++++++++++++++------ 2 files changed, 105 insertions(+), 27 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index 39518819a..3dab23de3 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -471,6 +471,7 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): raise InvalidChunk("Reset chunk has a value") self._reset_row() + class _State(ABC): """ Represents a state the state machine can be in @@ -505,6 +506,7 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": # force the chunk processing in the AWAITING_CELL_VALUE. return AWAITING_NEW_CELL(self._owner).handle_chunk(chunk) + class AWAITING_NEW_CELL(_State): """ Represents a cell boundary witin a row @@ -652,6 +654,7 @@ def finish_row(self) -> Row: self.reset() return new_row + def _chunk_has_field(chunk: ReadRowsResponse.CellChunk, field: str) -> bool: """ Returns true if the field is set on the chunk diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index 7ae64510c..c958d691f 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -328,14 +328,23 @@ async def test_retryable_attempt_hit_limit(self, limit): Stream should end after hitting the limit """ from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = self._make_one({}, mock.Mock()) + async def mock_gapic(*args, **kwargs): # continuously return a single row async def gen(): - for i in range(limit*2): - chunk = ReadRowsResponse.CellChunk(row_key=str(i).encode(), family_name="family_name", qualifier=b"qualifier", commit_row=True) + for i in range(limit * 2): + chunk = ReadRowsResponse.CellChunk( + row_key=str(i).encode(), + family_name="family_name", + qualifier=b"qualifier", + commit_row=True, + ) yield ReadRowsResponse(chunks=[chunk]) + return gen() + gen = instance._read_rows_retryable_attempt(mock_gapic, 0, None, None, limit) # should yield values up to the limit for i in range(limit): @@ -351,12 +360,16 @@ async def test_retryable_ignore_repeated_rows(self): """ from google.cloud.bigtable._read_rows import _ReadRowsOperation from google.cloud.bigtable.row import Row + async def mock_stream(): while True: yield Row(b"dup_key", cells=[]) yield Row(b"dup_key", cells=[]) yield Row(b"new", cells=[]) - with mock.patch.object(_ReadRowsOperation, "merge_row_response_stream") as mock_stream_fn: + + with mock.patch.object( + _ReadRowsOperation, "merge_row_response_stream" + ) as mock_stream_fn: mock_stream_fn.return_value = mock_stream() instance = self._make_one({}, mock.AsyncMock()) first_row = await instance.__anext__() @@ -371,12 +384,16 @@ async def test_retryable_ignore_last_scanned_rows(self): """ from google.cloud.bigtable._read_rows import _ReadRowsOperation from google.cloud.bigtable.row import Row, _LastScannedRow + async def mock_stream(): while True: yield Row(b"key1", cells=[]) yield _LastScannedRow(b"ignored") yield Row(b"key2", cells=[]) - with mock.patch.object(_ReadRowsOperation, "merge_row_response_stream") as mock_stream_fn: + + with mock.patch.object( + _ReadRowsOperation, "merge_row_response_stream" + ) as mock_stream_fn: mock_stream_fn.return_value = mock_stream() instance = self._make_one({}, mock.AsyncMock()) first_row = await instance.__anext__() @@ -384,8 +401,8 @@ async def mock_stream(): second_row = await instance.__anext__() assert second_row.row_key == b"key2" -class TestStateMachine(unittest.TestCase): +class TestStateMachine(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.bigtable._read_rows import _StateMachine @@ -397,6 +414,7 @@ def _make_one(self, *args, **kwargs): def test_ctor(self): from google.cloud.bigtable._read_rows import _RowBuilder + instance = self._make_one() assert instance.last_seen_row_key is None assert isinstance(instance.current_state, AWAITING_NEW_ROW) @@ -433,18 +451,20 @@ def test__reset_row(self): def test_handle_last_scanned_row_wrong_state(self): from google.cloud.bigtable.exceptions import InvalidChunk + instance = self._make_one() instance.current_state = AWAITING_NEW_CELL(None) with pytest.raises(InvalidChunk) as e: - instance.handle_last_scanned_row('row_key') + instance.handle_last_scanned_row("row_key") assert e.value.args[0] == "Last scanned row key received in invalid state" instance.current_state = AWAITING_CELL_VALUE(None) with pytest.raises(InvalidChunk) as e: - instance.handle_last_scanned_row('row_key') + instance.handle_last_scanned_row("row_key") assert e.value.args[0] == "Last scanned row key received in invalid state" def test_handle_last_scanned_row_out_of_order(self): from google.cloud.bigtable.exceptions import InvalidChunk + instance = self._make_one() instance.last_seen_row_key = b"b" with pytest.raises(InvalidChunk) as e: @@ -456,6 +476,7 @@ def test_handle_last_scanned_row_out_of_order(self): def test_handle_last_scanned_row(self): from google.cloud.bigtable.row import _LastScannedRow + instance = self._make_one() instance.adapter = mock.Mock() instance.last_seen_row_key = b"a" @@ -470,6 +491,7 @@ def test_handle_last_scanned_row(self): def test__handle_complete_row(self): from google.cloud.bigtable.row import Row + instance = self._make_one() instance.current_state = mock.Mock() instance.current_family = "family" @@ -485,33 +507,45 @@ def test__handle_complete_row(self): def test__handle_reset_chunk_errors(self): from google.cloud.bigtable.exceptions import InvalidChunk from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = self._make_one() with pytest.raises(InvalidChunk) as e: instance._handle_reset_chunk(mock.Mock()) instance.current_state = mock.Mock() assert e.value.args[0] == "Reset chunk received when not processing row" with pytest.raises(InvalidChunk) as e: - instance._handle_reset_chunk(ReadRowsResponse.CellChunk(row_key=b"row_key")._pb) + instance._handle_reset_chunk( + ReadRowsResponse.CellChunk(row_key=b"row_key")._pb + ) assert e.value.args[0] == "Reset chunk has a row key" with pytest.raises(InvalidChunk) as e: - instance._handle_reset_chunk(ReadRowsResponse.CellChunk(family_name="family")._pb) + instance._handle_reset_chunk( + ReadRowsResponse.CellChunk(family_name="family")._pb + ) assert e.value.args[0] == "Reset chunk has a family name" with pytest.raises(InvalidChunk) as e: - instance._handle_reset_chunk(ReadRowsResponse.CellChunk(qualifier=b"qualifier")._pb) + instance._handle_reset_chunk( + ReadRowsResponse.CellChunk(qualifier=b"qualifier")._pb + ) assert e.value.args[0] == "Reset chunk has a qualifier" with pytest.raises(InvalidChunk) as e: - instance._handle_reset_chunk(ReadRowsResponse.CellChunk(timestamp_micros=1)._pb) + instance._handle_reset_chunk( + ReadRowsResponse.CellChunk(timestamp_micros=1)._pb + ) assert e.value.args[0] == "Reset chunk has a timestamp" with pytest.raises(InvalidChunk) as e: instance._handle_reset_chunk(ReadRowsResponse.CellChunk(value=b"value")._pb) assert e.value.args[0] == "Reset chunk has a value" with pytest.raises(InvalidChunk) as e: - instance._handle_reset_chunk(ReadRowsResponse.CellChunk(labels=["label"])._pb) + instance._handle_reset_chunk( + ReadRowsResponse.CellChunk(labels=["label"])._pb + ) assert e.value.args[0] == "Reset chunk has labels" def test_handle_chunk_out_of_order(self): from google.cloud.bigtable.exceptions import InvalidChunk from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = self._make_one() instance.last_seen_row_key = b"b" with pytest.raises(InvalidChunk) as e: @@ -523,10 +557,10 @@ def test_handle_chunk_out_of_order(self): instance.handle_chunk(chunk) assert "increasing" in e.value.args[0] - def test_handle_chunk_reset(self): """Should call _handle_reset_chunk when a chunk with reset_row is encountered""" from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = self._make_one() with mock.patch.object(type(instance), "_handle_reset_chunk") as mock_reset: chunk = ReadRowsResponse.CellChunk(reset_row=True)._pb @@ -537,8 +571,11 @@ def test_handle_chunk_reset(self): @pytest.mark.parametrize("state", [AWAITING_NEW_ROW, AWAITING_CELL_VALUE]) def handle_chunk_with_commit_wrong_state(self, state): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = self._make_one() - with mock.patch.object(type(instance.current_state), "handle_chunk") as mock_state_handle: + with mock.patch.object( + type(instance.current_state), "handle_chunk" + ) as mock_state_handle: mock_state_handle.return_value = state(mock.Mock()) with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk(commit_row=True)._pb @@ -549,9 +586,12 @@ def handle_chunk_with_commit_wrong_state(self, state): def test_handle_chunk_with_commit(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable.row import Row + instance = self._make_one() with mock.patch.object(type(instance), "_reset_row") as mock_reset: - chunk = ReadRowsResponse.CellChunk(row_key=b"row_key", family_name="f", qualifier=b"q", commit_row=True)._pb + chunk = ReadRowsResponse.CellChunk( + row_key=b"row_key", family_name="f", qualifier=b"q", commit_row=True + )._pb output = instance.handle_chunk(chunk) assert isinstance(output, Row) assert output.row_key == b"row_key" @@ -560,13 +600,15 @@ def test_handle_chunk_with_commit(self): assert instance.last_seen_row_key == b"row_key" assert mock_reset.call_count == 1 - def test_handle_chunk_with_commit_empty_strings(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable.row import Row + instance = self._make_one() with mock.patch.object(type(instance), "_reset_row") as mock_reset: - chunk = ReadRowsResponse.CellChunk(row_key=b"row_key", family_name="", qualifier=b"", commit_row=True)._pb + chunk = ReadRowsResponse.CellChunk( + row_key=b"row_key", family_name="", qualifier=b"", commit_row=True + )._pb output = instance.handle_chunk(chunk) assert isinstance(output, Row) assert output.row_key == b"row_key" @@ -575,22 +617,24 @@ def test_handle_chunk_with_commit_empty_strings(self): assert instance.last_seen_row_key == b"row_key" assert mock_reset.call_count == 1 - def handle_chunk_incomplete(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.row import Row + instance = self._make_one() - chunk = ReadRowsResponse.CellChunk(row_key=b"row_key", family_name="f", qualifier=b"q", commit_row=False)._pb + chunk = ReadRowsResponse.CellChunk( + row_key=b"row_key", family_name="f", qualifier=b"q", commit_row=False + )._pb output = instance.handle_chunk(chunk) assert output is None assert isinstance(instance.current_state, AWAITING_CELL_VALUE) assert instance.current_family == "f" assert instance.current_qualifier == b"q" -class TestState(unittest.TestCase): +class TestState(unittest.TestCase): def test_AWAITING_NEW_ROW_empty_key(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = AWAITING_NEW_ROW(mock.Mock()) with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk(row_key=b"")._pb @@ -607,10 +651,11 @@ def test_AWAITING_NEW_ROW(self): delegate the call to AWAITING_NEW_CELL """ from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + instance = AWAITING_NEW_ROW(mock.Mock()) with mock.patch.object(AWAITING_NEW_CELL, "handle_chunk") as mock_delegate: chunk = ReadRowsResponse.CellChunk(row_key=b"row_key")._pb - output = instance.handle_chunk(chunk) + instance.handle_chunk(chunk) assert instance._owner.adapter.start_row.call_count == 1 assert instance._owner.adapter.start_row.call_args[0][0] == b"row_key" mock_delegate.assert_called_once_with(chunk) @@ -618,6 +663,7 @@ def test_AWAITING_NEW_ROW(self): def test_AWAITING_NEW_CELL_family_without_qualifier(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() state_machine.current_qualifier = b"q" instance = AWAITING_NEW_CELL(state_machine) @@ -629,6 +675,7 @@ def test_AWAITING_NEW_CELL_family_without_qualifier(self): def test_AWAITING_NEW_CELL_qualifier_without_family(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() instance = AWAITING_NEW_CELL(state_machine) with pytest.raises(InvalidChunk) as e: @@ -639,6 +686,7 @@ def test_AWAITING_NEW_CELL_qualifier_without_family(self): def test_AWAITING_NEW_CELL_no_row_state(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() instance = AWAITING_NEW_CELL(state_machine) with pytest.raises(InvalidChunk) as e: @@ -654,6 +702,7 @@ def test_AWAITING_NEW_CELL_no_row_state(self): def test_AWAITING_NEW_CELL_invalid_row_key(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() instance = AWAITING_NEW_CELL(state_machine) state_machine.adapter.current_key = b"abc" @@ -665,6 +714,7 @@ def test_AWAITING_NEW_CELL_invalid_row_key(self): def test_AWAITING_NEW_CELL_success_no_split(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() state_machine.adapter = mock.Mock() instance = AWAITING_NEW_CELL(state_machine) @@ -674,7 +724,14 @@ def test_AWAITING_NEW_CELL_success_no_split(self): labels = ["label"] timestamp = 123 value = b"value" - chunk = ReadRowsResponse.CellChunk(row_key=row_key, family_name=family, qualifier=qualifier, timestamp_micros=timestamp, value=value, labels=labels)._pb + chunk = ReadRowsResponse.CellChunk( + row_key=row_key, + family_name=family, + qualifier=qualifier, + timestamp_micros=timestamp, + value=value, + labels=labels, + )._pb state_machine.adapter.current_key = row_key new_state = instance.handle_chunk(chunk) assert state_machine.adapter.start_cell.call_count == 1 @@ -691,6 +748,7 @@ def test_AWAITING_NEW_CELL_success_no_split(self): def test_AWAITING_NEW_CELL_success_with_split(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() state_machine.adapter = mock.Mock() instance = AWAITING_NEW_CELL(state_machine) @@ -700,7 +758,15 @@ def test_AWAITING_NEW_CELL_success_with_split(self): labels = ["label"] timestamp = 123 value = b"value" - chunk = ReadRowsResponse.CellChunk(value_size=1, row_key=row_key, family_name=family, qualifier=qualifier, timestamp_micros=timestamp, value=value, labels=labels)._pb + chunk = ReadRowsResponse.CellChunk( + value_size=1, + row_key=row_key, + family_name=family, + qualifier=qualifier, + timestamp_micros=timestamp, + value=value, + labels=labels, + )._pb state_machine.adapter.current_key = row_key new_state = instance.handle_chunk(chunk) assert state_machine.adapter.start_cell.call_count == 1 @@ -717,6 +783,7 @@ def test_AWAITING_NEW_CELL_success_with_split(self): def test_AWAITING_CELL_VALUE_w_row_key(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() instance = AWAITING_CELL_VALUE(state_machine) with pytest.raises(InvalidChunk) as e: @@ -724,9 +791,10 @@ def test_AWAITING_CELL_VALUE_w_row_key(self): instance.handle_chunk(chunk) assert "In progress cell had a row key" in e.value.args[0] - def test_AWAITING_CELL_VALUE_w_row_key(self): + def test_AWAITING_CELL_VALUE_w_family(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() instance = AWAITING_CELL_VALUE(state_machine) with pytest.raises(InvalidChunk) as e: @@ -737,6 +805,7 @@ def test_AWAITING_CELL_VALUE_w_row_key(self): def test_AWAITING_CELL_VALUE_w_qualifier(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() instance = AWAITING_CELL_VALUE(state_machine) with pytest.raises(InvalidChunk) as e: @@ -747,6 +816,7 @@ def test_AWAITING_CELL_VALUE_w_qualifier(self): def test_AWAITING_CELL_VALUE_w_timestamp(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() instance = AWAITING_CELL_VALUE(state_machine) with pytest.raises(InvalidChunk) as e: @@ -757,6 +827,7 @@ def test_AWAITING_CELL_VALUE_w_timestamp(self): def test_AWAITING_CELL_VALUE_w_labels(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() instance = AWAITING_CELL_VALUE(state_machine) with pytest.raises(InvalidChunk) as e: @@ -767,6 +838,7 @@ def test_AWAITING_CELL_VALUE_w_labels(self): def test_AWAITING_CELL_VALUE_continuation(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() state_machine.adapter = mock.Mock() instance = AWAITING_CELL_VALUE(state_machine) @@ -781,6 +853,7 @@ def test_AWAITING_CELL_VALUE_continuation(self): def test_AWAITING_CELL_VALUE_final_chunk(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine + state_machine = _StateMachine() state_machine.adapter = mock.Mock() instance = AWAITING_CELL_VALUE(state_machine) @@ -944,11 +1017,12 @@ def test_reset(self): self.assertEqual(row_builder.working_value, None) self.assertEqual(len(row_builder.completed_cells), 0) -class TestChunkHasField(): +class TestChunkHasField: def test__chunk_has_field_empty(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _chunk_has_field + chunk = ReadRowsResponse.CellChunk()._pb assert not _chunk_has_field(chunk, "family_name") assert not _chunk_has_field(chunk, "qualifier") @@ -956,6 +1030,7 @@ def test__chunk_has_field_empty(self): def test__chunk_has_field_populated_empty_strings(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _chunk_has_field - chunk = ReadRowsResponse.CellChunk(qualifier=b'', family_name="")._pb + + chunk = ReadRowsResponse.CellChunk(qualifier=b"", family_name="")._pb assert _chunk_has_field(chunk, "family_name") assert _chunk_has_field(chunk, "qualifier") From 50dc608c40b12ac751c28725cc579cec61a8b36d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 20 Apr 2023 16:52:51 -0700 Subject: [PATCH 305/349] reverted pooled multicallable changes --- gapic-generator-fork | 2 +- .../transports/pooled_grpc_asyncio.py | 52 +++++++++++++------ 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index cd4627b0d..09948f5a8 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit cd4627b0de8a2273fab1081a0bedde20077ba60c +Subproject commit 09948f5a8fcf58fc5c6ee863e2e1ee6750bda51c diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index c9ab6c438..fdf3dd8a0 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -43,22 +43,40 @@ class PooledMultiCallable: - def __init__( - self, channel_pool: "PooledChannel", call_fn_name: str, *args, **kwargs - ): + def __init__(self, channel_pool: "PooledChannel", *args, **kwargs): self._init_args = args self._init_kwargs = kwargs self.next_channel_fn = channel_pool.next_channel - self.call_fn_name = call_fn_name - self._stubs: dict[aio.Channel, Callable] = {} - - def __call__(self, *args, **kwargs) -> aio.Call: - channel = self.next_channel_fn() - if channel not in self._stubs: - self._stubs[channel] = getattr(channel, self.call_fn_name)( - *self._init_args, **self._init_kwargs - ) - return self._stubs[channel](*args, **kwargs) + + +class PooledUnaryUnaryMultiCallable(PooledMultiCallable, aio.UnaryUnaryMultiCallable): + def __call__(self, *args, **kwargs) -> aio.UnaryUnaryCall: + return self.next_channel_fn().unary_unary( + *self._init_args, **self._init_kwargs + )(*args, **kwargs) + + +class PooledUnaryStreamMultiCallable(PooledMultiCallable, aio.UnaryStreamMultiCallable): + def __call__(self, *args, **kwargs) -> aio.UnaryStreamCall: + return self.next_channel_fn().unary_stream( + *self._init_args, **self._init_kwargs + )(*args, **kwargs) + + +class PooledStreamUnaryMultiCallable(PooledMultiCallable, aio.StreamUnaryMultiCallable): + def __call__(self, *args, **kwargs) -> aio.StreamUnaryCall: + return self.next_channel_fn().stream_unary( + *self._init_args, **self._init_kwargs + )(*args, **kwargs) + + +class PooledStreamStreamMultiCallable( + PooledMultiCallable, aio.StreamStreamMultiCallable +): + def __call__(self, *args, **kwargs) -> aio.StreamStreamCall: + return self.next_channel_fn().stream_stream( + *self._init_args, **self._init_kwargs + )(*args, **kwargs) class PooledChannel(aio.Channel): @@ -96,16 +114,16 @@ def next_channel(self) -> aio.Channel: return channel def unary_unary(self, *args, **kwargs) -> grpc.aio.UnaryUnaryMultiCallable: - return PooledMultiCallable(self, "unary_unary", *args, **kwargs) + return PooledUnaryUnaryMultiCallable(self, *args, **kwargs) def unary_stream(self, *args, **kwargs) -> grpc.aio.UnaryStreamMultiCallable: - return PooledMultiCallable(self, "unary_stream", *args, **kwargs) + return PooledUnaryStreamMultiCallable(self, *args, **kwargs) def stream_unary(self, *args, **kwargs) -> grpc.aio.StreamUnaryMultiCallable: - return PooledMultiCallable(self, "stream_unary", *args, **kwargs) + return PooledStreamUnaryMultiCallable(self, *args, **kwargs) def stream_stream(self, *args, **kwargs) -> grpc.aio.StreamStreamMultiCallable: - return PooledMultiCallable(self, "stream_stream", *args, **kwargs) + return PooledStreamStreamMultiCallable(self, *args, **kwargs) async def close(self, grace=None): close_fns = [channel.close(grace=grace) for channel in self._pool] From b11675585ea059a27b7d5db21b52e825eb84e0ff Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 20 Apr 2023 17:19:47 -0700 Subject: [PATCH 306/349] pass scopes to created channels --- gapic-generator-fork | 2 +- .../bigtable/transports/pooled_grpc_asyncio.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/gapic-generator-fork b/gapic-generator-fork index 09948f5a8..b26cda7d1 160000 --- a/gapic-generator-fork +++ b/gapic-generator-fork @@ -1 +1 @@ -Subproject commit 09948f5a8fcf58fc5c6ee863e2e1ee6750bda51c +Subproject commit b26cda7d163d6e0d45c9684f328ca32fb49b799a diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py index fdf3dd8a0..372e5796d 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/pooled_grpc_asyncio.py @@ -86,8 +86,10 @@ def __init__( host: str = "bigtable.googleapis.com", credentials: Optional[ga_credentials.Credentials] = None, credentials_file: Optional[str] = None, - scopes: Optional[Sequence[str]] = None, quota_project_id: Optional[str] = None, + default_scopes: Optional[Sequence[str]] = None, + scopes: Optional[Sequence[str]] = None, + default_host: Optional[str] = None, insecure: bool = False, **kwargs, ): @@ -101,8 +103,10 @@ def __init__( target=host, credentials=credentials, credentials_file=credentials_file, - scopes=scopes, quota_project_id=quota_project_id, + default_scopes=default_scopes, + scopes=scopes, + default_host=default_host, **kwargs, ) for i in range(pool_size): @@ -249,8 +253,10 @@ def create_channel( host, credentials=credentials, credentials_file=credentials_file, - scopes=scopes, quota_project_id=quota_project_id, + default_scopes=cls.AUTH_SCOPES, + scopes=scopes, + default_host=cls.DEFAULT_HOST, **kwargs, ) From ec5eb071849b130e8d467d5a7f44ddf922ba4746 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 20 Apr 2023 18:13:23 -0700 Subject: [PATCH 307/349] added basic ping system test --- noxfile.py | 2 ++ tests/system/test_system.py | 45 +++++++++++++++++++++++++++++++++++++ tests/unit/test_client.py | 13 ++++++++--- 3 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 tests/system/test_system.py diff --git a/noxfile.py b/noxfile.py index ed69bf85e..035599844 100644 --- a/noxfile.py +++ b/noxfile.py @@ -49,6 +49,7 @@ SYSTEM_TEST_STANDARD_DEPENDENCIES = [ "mock", "pytest", + "pytest-asyncio", "google-cloud-testutils", ] SYSTEM_TEST_EXTERNAL_DEPENDENCIES = [] @@ -306,6 +307,7 @@ def system(session): "py.test", "--quiet", f"--junitxml=system_{session.python}_sponge_log.xml", + "--ignore=tests/system/v2_client", system_test_folder_path, *session.posargs, ) diff --git a/tests/system/test_system.py b/tests/system/test_system.py new file mode 100644 index 000000000..09cd66e0a --- /dev/null +++ b/tests/system/test_system.py @@ -0,0 +1,45 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import pytest_asyncio + + +@pytest_asyncio.fixture +async def client(): + from google.cloud.bigtable import BigtableDataClient + + project = "sanche-testing-project" + async with BigtableDataClient(project=project) as client: + yield client + + +@pytest_asyncio.fixture +async def table(client): + instance = "sanche-test" + table = "random" + async with client.get_table(instance, table) as table: + yield table + + +@pytest.mark.asyncio +async def test_ping_and_warm_gapic(client, table): + """ + Simple ping rpc test + This test ensures channels are able to authenticate with backend + """ + request = { + "name": client._gapic_client.instance_path(client.project, table.instance) + } + await client._gapic_client.ping_and_warm(request) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f82750764..04d750a06 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -586,7 +586,9 @@ async def test__remove_instance_registration(): async def test__multiple_table_registration(): async with _make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: - instance_1_path = client._gapic_client.instance_path(client.project, "instance_1") + instance_1_path = client._gapic_client.instance_path( + client.project, "instance_1" + ) assert len(client._instance_owners[instance_1_path]) == 1 assert len(client._active_instances) == 1 assert id(table_1) in client._instance_owners[instance_1_path] @@ -609,8 +611,12 @@ async def test__multiple_instance_registration(): async with _make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: async with client.get_table("instance_2", "table_2") as table_2: - instance_1_path = client._gapic_client.instance_path(client.project, "instance_1") - instance_2_path = client._gapic_client.instance_path(client.project, "instance_2") + instance_1_path = client._gapic_client.instance_path( + client.project, "instance_1" + ) + instance_2_path = client._gapic_client.instance_path( + client.project, "instance_2" + ) assert len(client._instance_owners[instance_1_path]) == 1 assert len(client._instance_owners[instance_2_path]) == 1 assert len(client._active_instances) == 2 @@ -658,6 +664,7 @@ async def test_get_table(): @pytest.mark.asyncio async def test_get_table_context_manager(): from google.cloud.bigtable.client import Table + expected_table_id = "table-id" expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" From 55cdcc2cda20b0a048353fa4f434b03cc1ed833c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 20 Apr 2023 18:31:29 -0700 Subject: [PATCH 308/349] keep both the names and ids in table object --- google/cloud/bigtable/client.py | 15 +++++++++---- tests/system/test_system.py | 4 +--- tests/unit/test_client.py | 40 ++++++++++++++++++++------------- 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index dacf864bf..dfd8b16cd 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -333,15 +333,22 @@ def __init__( instance_id: The Bigtable instance ID to associate with this client. instance_id is combined with the client's project to fully specify the instance - table_id: The ID of the table. + table_id: The ID of the table. table_id is combined with the + instance_id and the client's project to fully specify the table app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles Raises: - RuntimeError if called outside of an async context (no running event loop) """ self.client = client - self.instance = instance_id + self.instance_id = instance_id + self.instance_name = self.client._gapic_client.instance_path( + self.client.project, instance_id + ) self.table_id = table_id + self.table_name = self.client._gapic_client.table_path( + self.client.project, instance_id, table_id + ) self.app_profile_id = app_profile_id # raises RuntimeError if called outside of an async context (no running event loop) try: @@ -681,7 +688,7 @@ async def close(self): """ Called to close the Table instance and release any resources held by it. """ - await self.client._remove_instance_registration(self.instance, self) + await self.client._remove_instance_registration(self.instance_id, self) async def __aenter__(self): """ @@ -690,7 +697,7 @@ async def __aenter__(self): Register this instance with the client, so that grpc channels will be warmed for the specified instance """ - await self.client._register_instance(self.instance, self) + await self.client._register_instance(self.instance_id, self) return self async def __aexit__(self, exc_type, exc_val, exc_tb): diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 09cd66e0a..04231a01e 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -39,7 +39,5 @@ async def test_ping_and_warm_gapic(client, table): Simple ping rpc test This test ensures channels are able to authenticate with backend """ - request = { - "name": client._gapic_client.instance_path(client.project, table.instance) - } + request = {"name": table.instance_name} await client._gapic_client.ping_and_warm(request) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 04d750a06..b1af183a1 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -651,13 +651,18 @@ async def test_get_table(): await asyncio.sleep(0) assert isinstance(table, Table) assert table.table_id == expected_table_id - assert table.instance == expected_instance_id + assert ( + table.table_name + == f"projects/{client.project}/instances/{expected_instance_id}/tables/{expected_table_id}" + ) + assert table.instance_id == expected_instance_id + assert ( + table.instance_name + == f"projects/{client.project}/instances/{expected_instance_id}" + ) assert table.app_profile_id == expected_app_profile_id assert table.client is client - full_instance_name = client._gapic_client.instance_path( - client.project, expected_instance_id - ) - assert full_instance_name in client._active_instances + assert table.instance_name in client._active_instances await client.close() @@ -668,9 +673,10 @@ async def test_get_table_context_manager(): expected_table_id = "table-id" expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" + expected_project_id = "project-id" with mock.patch.object(Table, "close") as close_mock: - async with _make_one(project="project-id") as client: + async with _make_one(project=expected_project_id) as client: async with client.get_table( expected_instance_id, expected_table_id, @@ -679,13 +685,18 @@ async def test_get_table_context_manager(): await asyncio.sleep(0) assert isinstance(table, Table) assert table.table_id == expected_table_id - assert table.instance == expected_instance_id + assert ( + table.table_name + == f"projects/{expected_project_id}/instances/{expected_instance_id}/tables/{expected_table_id}" + ) + assert table.instance_id == expected_instance_id + assert ( + table.instance_name + == f"projects/{expected_project_id}/instances/{expected_instance_id}" + ) assert table.app_profile_id == expected_app_profile_id assert table.client is client - full_instance_name = client._gapic_client.instance_path( - client.project, expected_instance_id - ) - assert full_instance_name in client._active_instances + assert table.instance_name in client._active_instances assert close_mock.call_count == 1 @@ -800,13 +811,10 @@ async def test_table_ctor(): ) await asyncio.sleep(0) assert table.table_id == expected_table_id - assert table.instance == expected_instance_id + assert table.instance_id == expected_instance_id assert table.app_profile_id == expected_app_profile_id assert table.client is client - full_instance_name = client._gapic_client.instance_path( - client.project, expected_instance_id - ) - assert full_instance_name in client._active_instances + assert table.instance_name in client._active_instances # ensure task reaches completion await table._register_instance_task assert table._register_instance_task.done() From 385533368e12afc6d2e74d07699145614a7d3396 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 10:01:06 -0700 Subject: [PATCH 309/349] added api-core to noxfile tests --- noxfile.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/noxfile.py b/noxfile.py index fdc537a45..ebce695d0 100644 --- a/noxfile.py +++ b/noxfile.py @@ -40,7 +40,7 @@ "pytest-asyncio", ] UNIT_TEST_EXTERNAL_DEPENDENCIES = [] -UNIT_TEST_LOCAL_DEPENDENCIES = [] +UNIT_TEST_LOCAL_DEPENDENCIES = ["python-api-core"] UNIT_TEST_DEPENDENCIES = [] UNIT_TEST_EXTRAS = [] UNIT_TEST_EXTRAS_BY_PYTHON = {} @@ -53,7 +53,7 @@ "google-cloud-testutils", ] SYSTEM_TEST_EXTERNAL_DEPENDENCIES = [] -SYSTEM_TEST_LOCAL_DEPENDENCIES = [] +SYSTEM_TEST_LOCAL_DEPENDENCIES = ["python-api-core"] SYSTEM_TEST_DEPENDENCIES = [] SYSTEM_TEST_EXTRAS = [] SYSTEM_TEST_EXTRAS_BY_PYTHON = {} @@ -166,7 +166,7 @@ def install_unittest_dependencies(session, *constraints): session.install(*UNIT_TEST_EXTERNAL_DEPENDENCIES, *constraints) if UNIT_TEST_LOCAL_DEPENDENCIES: - session.install(*UNIT_TEST_LOCAL_DEPENDENCIES, *constraints) + session.install("-e", *UNIT_TEST_LOCAL_DEPENDENCIES, *constraints) if UNIT_TEST_EXTRAS_BY_PYTHON: extras = UNIT_TEST_EXTRAS_BY_PYTHON.get(session.python, []) @@ -179,7 +179,6 @@ def install_unittest_dependencies(session, *constraints): session.install("-e", f".[{','.join(extras)}]", *constraints) else: session.install("-e", ".", *constraints) - session.install("-e", "./python-api-core", *constraints) def default(session): From 213519e83a3e415560ce6cf9ff71c9525cc22690 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 10:13:13 -0700 Subject: [PATCH 310/349] added basic read rows stream to system tests --- tests/system/test_system.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 04231a01e..0775d79e9 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -14,21 +14,22 @@ import pytest import pytest_asyncio +import os @pytest_asyncio.fixture async def client(): from google.cloud.bigtable import BigtableDataClient - project = "sanche-testing-project" + project = os.getenv("GOOGLE_CLOUD_PROJECT") or None async with BigtableDataClient(project=project) as client: yield client @pytest_asyncio.fixture async def table(client): - instance = "sanche-test" - table = "random" + instance = os.getenv("BIGTABLE_TEST_INSTANCE") or "test-instance" + table = os.getenv("BIGTABLE_TEST_TABLE") or "test-table" async with client.get_table(instance, table) as table: yield table @@ -41,3 +42,17 @@ async def test_ping_and_warm_gapic(client, table): """ request = {"name": table.instance_name} await client._gapic_client.ping_and_warm(request) + +@pytest.mark.asyncio +async def test_read_rows_stream(table): + """ + Ensure that the read_rows_stream method works + """ + from google.cloud.bigtable import ReadRowsQuery + + query = ReadRowsQuery() + generator = await table.read_rows_stream(query) + first_row = await generator.__anext__() + print(first_row) + async for row in generator: + print(row) From 9e3b41109a606e81bb1801e4de35c9ac435cf564 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 10:14:17 -0700 Subject: [PATCH 311/349] pull project details out of env vars --- tests/system/test_system.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 04231a01e..7f06d55df 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -14,21 +14,22 @@ import pytest import pytest_asyncio +import os @pytest_asyncio.fixture async def client(): from google.cloud.bigtable import BigtableDataClient - project = "sanche-testing-project" + project = os.getenv("GOOGLE_CLOUD_PROJECT") or None async with BigtableDataClient(project=project) as client: yield client @pytest_asyncio.fixture async def table(client): - instance = "sanche-test" - table = "random" + instance = os.getenv("BIGTABLE_TEST_INSTANCE") or "test-instance" + table = os.getenv("BIGTABLE_TEST_TABLE") or "test-table" async with client.get_table(instance, table) as table: yield table From d8cf15811a2c50aa81a227d5c4d21a9b9cab6399 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 10:46:00 -0700 Subject: [PATCH 312/349] added automatic row creation for system tests --- tests/system/test_system.py | 60 +++++++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 0775d79e9..41cb6c2cf 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -34,6 +34,49 @@ async def table(client): yield table +class TempRowBuilder: + """ + Used to add rows to a table for testing purposes. + """ + def __init__(self, table): + self.rows = [] + self.table = table + + async def add_row(self, row_key, family, qualifier, value): + request = { + "table_name": self.table.table_name, + "row_key": row_key, + "mutations": [ + { + "set_cell": { + "family_name": family, + "column_qualifier": qualifier, + "value": value, + } + } + ], + } + await self.table.client._gapic_client.mutate_row(request) + self.rows.append(row_key) + + async def delete_rows(self): + request = { + "table_name": self.table.table_name, + "entries": [ + {"row_key": row, "mutations": [{"delete_from_row": {}}]} + for row in self.rows + ], + } + await self.table.client._gapic_client.mutate_rows(request) + + +@pytest_asyncio.fixture(scope="function") +async def temp_rows(table): + builder = TempRowBuilder(table) + yield builder + await builder.delete_rows() + + @pytest.mark.asyncio async def test_ping_and_warm_gapic(client, table): """ @@ -43,16 +86,21 @@ async def test_ping_and_warm_gapic(client, table): request = {"name": table.instance_name} await client._gapic_client.ping_and_warm(request) + @pytest.mark.asyncio -async def test_read_rows_stream(table): +async def test_read_rows_stream(table, temp_rows): """ Ensure that the read_rows_stream method works """ from google.cloud.bigtable import ReadRowsQuery - query = ReadRowsQuery() - generator = await table.read_rows_stream(query) + await temp_rows.add_row(b"row_key_1", "cf1", "c1", b"value1") + await temp_rows.add_row(b"row_key_2", "cf1", "c1", b"value2") + + generator = await table.read_rows_stream({}) first_row = await generator.__anext__() - print(first_row) - async for row in generator: - print(row) + second_row = await generator.__anext__() + assert first_row.row_key == b"row_key_1" + assert second_row.row_key == b"row_key_2" + with pytest.raises(StopAsyncIteration): + await generator.__anext__() From c9b82176eba98ebbe96cb80ed871d32ba3323a52 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 10:47:53 -0700 Subject: [PATCH 313/349] added read_rows non stream --- tests/system/test_system.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 41cb6c2cf..695895ae3 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -92,11 +92,10 @@ async def test_read_rows_stream(table, temp_rows): """ Ensure that the read_rows_stream method works """ - from google.cloud.bigtable import ReadRowsQuery - await temp_rows.add_row(b"row_key_1", "cf1", "c1", b"value1") await temp_rows.add_row(b"row_key_2", "cf1", "c1", b"value2") + # full table scan generator = await table.read_rows_stream({}) first_row = await generator.__anext__() second_row = await generator.__anext__() @@ -104,3 +103,17 @@ async def test_read_rows_stream(table, temp_rows): assert second_row.row_key == b"row_key_2" with pytest.raises(StopAsyncIteration): await generator.__anext__() + + +@pytest.mark.asyncio +async def test_read_rows(table, temp_rows): + """ + Ensure that the read_rows method works + """ + await temp_rows.add_row(b"row_key_1", "cf1", "c1", b"value1") + await temp_rows.add_row(b"row_key_2", "cf1", "c1", b"value2") + # full table scan + row_list = await table.read_rows({}) + assert len(row_list) == 2 + assert row_list[0].row_key == b"row_key_1" + assert row_list[1].row_key == b"row_key_2" From 500eff0e1b99dc1ace9a7ba75aa5495a9083c238 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 10:50:53 -0700 Subject: [PATCH 314/349] added range query system test --- tests/system/test_system.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 695895ae3..255b2d80f 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -117,3 +117,22 @@ async def test_read_rows(table, temp_rows): assert len(row_list) == 2 assert row_list[0].row_key == b"row_key_1" assert row_list[1].row_key == b"row_key_2" + + +@pytest.mark.asyncio +async def test_read_rows_range_query(table, temp_rows): + """ + Ensure that the read_rows method works + """ + from google.cloud.bigtable import ReadRowsQuery + from google.cloud.bigtable import RowRange + await temp_rows.add_row(b"a", "cf1", "c1", b"value1") + await temp_rows.add_row(b"b", "cf1", "c1", b"value2") + await temp_rows.add_row(b"c", "cf1", "c1", b"value2") + await temp_rows.add_row(b"d", "cf1", "c1", b"value2") + # full table scan + query = ReadRowsQuery(row_ranges=RowRange(start_key=b"b", end_key=b"d")) + row_list = await table.read_rows(query) + assert len(row_list) == 2 + assert row_list[0].row_key == b"b" + assert row_list[1].row_key == b"c" From 27130f085a7c17205ce230b17910d37a318849f3 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 11:36:46 -0700 Subject: [PATCH 315/349] added logic for temporary test tables and instances --- tests/system/test_system.py | 122 +++++++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 15 deletions(-) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 255b2d80f..0e418adca 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -15,9 +15,98 @@ import pytest import pytest_asyncio import os +import asyncio +TEST_FAMILY = "test-family" +TEST_FAMILY_2 = "test-family-2" -@pytest_asyncio.fixture +@pytest.fixture(scope="session") +def event_loop(): + return asyncio.get_event_loop() + +@pytest.fixture(scope="session") +def instance_admin_client(): + """Client for interacting with the Instance Admin API.""" + from google.cloud.bigtable_admin_v2 import BigtableInstanceAdminClient + with BigtableInstanceAdminClient() as client: + yield client + +@pytest.fixture(scope="session") +def table_admin_client(): + """Client for interacting with the Table Admin API.""" + from google.cloud.bigtable_admin_v2 import BigtableTableAdminClient + with BigtableTableAdminClient() as client: + yield client + +@pytest.fixture(scope="session") +def instance_id(instance_admin_client, project_id): + """ + Returns BIGTABLE_TEST_INSTANCE if set, otherwise creates a new temporary instance for the test session + """ + from google.cloud.bigtable_admin_v2 import types + from google.api_core import exceptions + # use user-specified instance if available + user_specified_instance = os.getenv("BIGTABLE_TEST_INSTANCE") + if user_specified_instance: + print("Using user-specified instance: {}".format(user_specified_instance)) + yield user_specified_instance + return + + # create a new temporary test instance + instance_id = "test-instance" + try: + operation =instance_admin_client.create_instance( + parent=f"projects/{project_id}", + instance_id=instance_id, + instance=types.Instance( + display_name="Test Instance", + labels={"python-system-test": "true"}, + ), + clusters={ + "test-cluster": types.Cluster( + location=f"projects/{project_id}/locations/us-central1-b", + serve_nodes=3, + ) + }, + ) + operation.result(timeout=240) + except exceptions.AlreadyExists: + pass + yield instance_id + instance_admin_client.delete_instance(name=f"projects/{project_id}/instances/{instance_id}") + +@pytest.fixture(scope="session") +def table_id(table_admin_client, project_id, instance_id): + """ + Returns BIGTABLE_TEST_TABLE if set, otherwise creates a new temporary table for the test session + """ + from google.cloud.bigtable_admin_v2 import types + from google.api_core import exceptions + from google.api_core import retry + # use user-specified instance if available + user_specified_table = os.getenv("BIGTABLE_TEST_TABLE") + if user_specified_table: + print("Using user-specified table: {}".format(user_specified_table)) + yield user_specified_table + return + + table_id = "test-table" + retry = retry.Retry(predicate=retry.if_exception_type(exceptions.FailedPrecondition)) + try: + table_admin_client.create_table( + parent=f"projects/{project_id}/instances/{instance_id}", + table_id=table_id, + table=types.Table( + column_families={TEST_FAMILY: types.ColumnFamily(), TEST_FAMILY_2: types.ColumnFamily()}, + ), + retry=retry, + ) + except exceptions.AlreadyExists: + pass + yield table_id + table_admin_client.delete_table(name=f"projects/{project_id}/instances/{instance_id}/tables/{table_id}") + +@pytest_asyncio.fixture(scope="session") async def client(): from google.cloud.bigtable import BigtableDataClient @@ -25,12 +114,15 @@ async def client(): async with BigtableDataClient(project=project) as client: yield client +@pytest.fixture(scope="session") +def project_id(client): + """Returns the project ID from the client.""" + yield client.project + -@pytest_asyncio.fixture -async def table(client): - instance = os.getenv("BIGTABLE_TEST_INSTANCE") or "test-instance" - table = os.getenv("BIGTABLE_TEST_TABLE") or "test-table" - async with client.get_table(instance, table) as table: +@pytest_asyncio.fixture(scope="session") +async def table(client, table_id, instance_id): + async with client.get_table(instance_id, table_id) as table: yield table @@ -42,7 +134,7 @@ def __init__(self, table): self.rows = [] self.table = table - async def add_row(self, row_key, family, qualifier, value): + async def add_row(self, row_key, family=TEST_FAMILY, qualifier=b"q", value=b"test-value"): request = { "table_name": self.table.table_name, "row_key": row_key, @@ -92,8 +184,8 @@ async def test_read_rows_stream(table, temp_rows): """ Ensure that the read_rows_stream method works """ - await temp_rows.add_row(b"row_key_1", "cf1", "c1", b"value1") - await temp_rows.add_row(b"row_key_2", "cf1", "c1", b"value2") + await temp_rows.add_row(b"row_key_1") + await temp_rows.add_row(b"row_key_2") # full table scan generator = await table.read_rows_stream({}) @@ -110,8 +202,8 @@ async def test_read_rows(table, temp_rows): """ Ensure that the read_rows method works """ - await temp_rows.add_row(b"row_key_1", "cf1", "c1", b"value1") - await temp_rows.add_row(b"row_key_2", "cf1", "c1", b"value2") + await temp_rows.add_row(b"row_key_1") + await temp_rows.add_row(b"row_key_2") # full table scan row_list = await table.read_rows({}) assert len(row_list) == 2 @@ -126,10 +218,10 @@ async def test_read_rows_range_query(table, temp_rows): """ from google.cloud.bigtable import ReadRowsQuery from google.cloud.bigtable import RowRange - await temp_rows.add_row(b"a", "cf1", "c1", b"value1") - await temp_rows.add_row(b"b", "cf1", "c1", b"value2") - await temp_rows.add_row(b"c", "cf1", "c1", b"value2") - await temp_rows.add_row(b"d", "cf1", "c1", b"value2") + await temp_rows.add_row(b"a") + await temp_rows.add_row(b"b") + await temp_rows.add_row(b"c") + await temp_rows.add_row(b"d") # full table scan query = ReadRowsQuery(row_ranges=RowRange(start_key=b"b", end_key=b"d")) row_list = await table.read_rows(query) From f4f4facfb783d20b4860e53cdbe7cd6c9323c3f4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 12:03:03 -0700 Subject: [PATCH 316/349] made iterator active into a property --- google/cloud/bigtable/iterators.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index c47904efb..1547c5c02 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -60,6 +60,7 @@ async def _start_idle_timer(self, idle_timeout: float): if sys.version_info >= (3, 8): self._idle_timeout_task.name = "ReadRowsIterator._idle_timeout" + @property def active(self): """ Returns True if the iterator is still active and has not been closed @@ -71,12 +72,12 @@ async def _idle_timeout_coroutine(self, idle_timeout: float): Coroutine that will cancel a stream if no interaction with the iterator in the last `idle_timeout` seconds. """ - while self.active(): + while self.active: next_timeout = self.last_interaction_time + idle_timeout await asyncio.sleep(next_timeout - time.time()) if ( self.last_interaction_time + idle_timeout < time.time() - and self.active() + and self.active ): # idle timeout has expired await self._finish_with_error( From 06dee54fa77957ff3cc62b77841ea03145a65420 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 12:03:20 -0700 Subject: [PATCH 317/349] added more read_rows system tests --- tests/system/test_system.py | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 0e418adca..8122a282b 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -228,3 +228,58 @@ async def test_read_rows_range_query(table, temp_rows): assert len(row_list) == 2 assert row_list[0].row_key == b"b" assert row_list[1].row_key == b"c" + +@pytest.mark.asyncio +async def test_read_rows_key_query(table, temp_rows): + """ + Ensure that the read_rows method works + """ + from google.cloud.bigtable import ReadRowsQuery + from google.cloud.bigtable import RowRange + await temp_rows.add_row(b"a") + await temp_rows.add_row(b"b") + await temp_rows.add_row(b"c") + await temp_rows.add_row(b"d") + # full table scan + query = ReadRowsQuery(row_keys=[b"a", b"c"]) + row_list = await table.read_rows(query) + assert len(row_list) == 2 + assert row_list[0].row_key == b"a" + assert row_list[1].row_key == b"c" + +@pytest.mark.asyncio +async def test_read_rows_stream_close(table, temp_rows): + """ + Ensure that the read_rows_stream can be closed + """ + await temp_rows.add_row(b"row_key_1") + await temp_rows.add_row(b"row_key_2") + + # full table scan + generator = await table.read_rows_stream({}) + first_row = await generator.__anext__() + assert first_row.row_key == b"row_key_1" + await generator.aclose() + assert generator.active is False + with pytest.raises(StopAsyncIteration) as e: + await generator.__anext__() + assert "closed" in str(e) + + +@pytest.mark.asyncio +async def test_read_rows_stream_inactive_timer(table, temp_rows): + """ + Ensure that the read_rows_stream method works + """ + from google.cloud.bigtable.exceptions import IdleTimeout + await temp_rows.add_row(b"row_key_1") + await temp_rows.add_row(b"row_key_2") + + generator = await table.read_rows_stream({}) + await generator._start_idle_timer(0.05) + await asyncio.sleep(0.2) + assert generator.active is False + with pytest.raises(IdleTimeout) as e: + await generator.__anext__() + assert "inactivity" in str(e) + assert "idle_timeout=0.1" in str(e) From 9e11f88e4be9a55fb75d48f84969d1499b24cf26 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 12:06:29 -0700 Subject: [PATCH 318/349] fixed lint issues --- google/cloud/bigtable/iterators.py | 5 +--- tests/system/test_system.py | 41 ++++++++++++++++++++++++----- tests/unit/test_client_read_rows.py | 2 +- 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index 1547c5c02..6cbaa81bb 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -75,10 +75,7 @@ async def _idle_timeout_coroutine(self, idle_timeout: float): while self.active: next_timeout = self.last_interaction_time + idle_timeout await asyncio.sleep(next_timeout - time.time()) - if ( - self.last_interaction_time + idle_timeout < time.time() - and self.active - ): + if self.last_interaction_time + idle_timeout < time.time() and self.active: # idle timeout has expired await self._finish_with_error( IdleTimeout( diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 8122a282b..543a14725 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -20,24 +20,30 @@ TEST_FAMILY = "test-family" TEST_FAMILY_2 = "test-family-2" + @pytest.fixture(scope="session") def event_loop(): return asyncio.get_event_loop() + @pytest.fixture(scope="session") def instance_admin_client(): """Client for interacting with the Instance Admin API.""" from google.cloud.bigtable_admin_v2 import BigtableInstanceAdminClient + with BigtableInstanceAdminClient() as client: yield client + @pytest.fixture(scope="session") def table_admin_client(): """Client for interacting with the Table Admin API.""" from google.cloud.bigtable_admin_v2 import BigtableTableAdminClient + with BigtableTableAdminClient() as client: yield client + @pytest.fixture(scope="session") def instance_id(instance_admin_client, project_id): """ @@ -45,6 +51,7 @@ def instance_id(instance_admin_client, project_id): """ from google.cloud.bigtable_admin_v2 import types from google.api_core import exceptions + # use user-specified instance if available user_specified_instance = os.getenv("BIGTABLE_TEST_INSTANCE") if user_specified_instance: @@ -55,7 +62,7 @@ def instance_id(instance_admin_client, project_id): # create a new temporary test instance instance_id = "test-instance" try: - operation =instance_admin_client.create_instance( + operation = instance_admin_client.create_instance( parent=f"projects/{project_id}", instance_id=instance_id, instance=types.Instance( @@ -73,7 +80,10 @@ def instance_id(instance_admin_client, project_id): except exceptions.AlreadyExists: pass yield instance_id - instance_admin_client.delete_instance(name=f"projects/{project_id}/instances/{instance_id}") + instance_admin_client.delete_instance( + name=f"projects/{project_id}/instances/{instance_id}" + ) + @pytest.fixture(scope="session") def table_id(table_admin_client, project_id, instance_id): @@ -83,6 +93,7 @@ def table_id(table_admin_client, project_id, instance_id): from google.cloud.bigtable_admin_v2 import types from google.api_core import exceptions from google.api_core import retry + # use user-specified instance if available user_specified_table = os.getenv("BIGTABLE_TEST_TABLE") if user_specified_table: @@ -91,20 +102,28 @@ def table_id(table_admin_client, project_id, instance_id): return table_id = "test-table" - retry = retry.Retry(predicate=retry.if_exception_type(exceptions.FailedPrecondition)) + retry = retry.Retry( + predicate=retry.if_exception_type(exceptions.FailedPrecondition) + ) try: table_admin_client.create_table( parent=f"projects/{project_id}/instances/{instance_id}", table_id=table_id, table=types.Table( - column_families={TEST_FAMILY: types.ColumnFamily(), TEST_FAMILY_2: types.ColumnFamily()}, + column_families={ + TEST_FAMILY: types.ColumnFamily(), + TEST_FAMILY_2: types.ColumnFamily(), + }, ), retry=retry, ) except exceptions.AlreadyExists: pass yield table_id - table_admin_client.delete_table(name=f"projects/{project_id}/instances/{instance_id}/tables/{table_id}") + table_admin_client.delete_table( + name=f"projects/{project_id}/instances/{instance_id}/tables/{table_id}" + ) + @pytest_asyncio.fixture(scope="session") async def client(): @@ -114,6 +133,7 @@ async def client(): async with BigtableDataClient(project=project) as client: yield client + @pytest.fixture(scope="session") def project_id(client): """Returns the project ID from the client.""" @@ -130,11 +150,14 @@ class TempRowBuilder: """ Used to add rows to a table for testing purposes. """ + def __init__(self, table): self.rows = [] self.table = table - async def add_row(self, row_key, family=TEST_FAMILY, qualifier=b"q", value=b"test-value"): + async def add_row( + self, row_key, family=TEST_FAMILY, qualifier=b"q", value=b"test-value" + ): request = { "table_name": self.table.table_name, "row_key": row_key, @@ -218,6 +241,7 @@ async def test_read_rows_range_query(table, temp_rows): """ from google.cloud.bigtable import ReadRowsQuery from google.cloud.bigtable import RowRange + await temp_rows.add_row(b"a") await temp_rows.add_row(b"b") await temp_rows.add_row(b"c") @@ -229,13 +253,14 @@ async def test_read_rows_range_query(table, temp_rows): assert row_list[0].row_key == b"b" assert row_list[1].row_key == b"c" + @pytest.mark.asyncio async def test_read_rows_key_query(table, temp_rows): """ Ensure that the read_rows method works """ from google.cloud.bigtable import ReadRowsQuery - from google.cloud.bigtable import RowRange + await temp_rows.add_row(b"a") await temp_rows.add_row(b"b") await temp_rows.add_row(b"c") @@ -247,6 +272,7 @@ async def test_read_rows_key_query(table, temp_rows): assert row_list[0].row_key == b"a" assert row_list[1].row_key == b"c" + @pytest.mark.asyncio async def test_read_rows_stream_close(table, temp_rows): """ @@ -272,6 +298,7 @@ async def test_read_rows_stream_inactive_timer(table, temp_rows): Ensure that the read_rows_stream method works """ from google.cloud.bigtable.exceptions import IdleTimeout + await temp_rows.add_row(b"row_key_1") await temp_rows.add_row(b"row_key_2") diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 8dd9ccb66..e18aeca9a 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -320,7 +320,7 @@ async def test_read_rows_idle_timeout(): await gen.__anext__() await asyncio.sleep(0.2) # generator should be expired - assert not gen.active() + assert not gen.active assert type(gen._merger_or_error) == IdleTimeout assert gen._idle_timeout_task is None await client.close() From 794c55a8777c6191cabf1d7902161598d573564e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 13:51:52 -0700 Subject: [PATCH 319/349] added iterator tests --- google/cloud/bigtable/iterators.py | 5 +- tests/unit/test_iterators.py | 266 +++++++++++++++++++++++++++++ 2 files changed, 268 insertions(+), 3 deletions(-) create mode 100644 tests/unit/test_iterators.py diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index 6cbaa81bb..086d4e7e3 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -65,7 +65,7 @@ def active(self): """ Returns True if the iterator is still active and has not been closed """ - return isinstance(self._merger_or_error, _ReadRowsOperation) + return not isinstance(self._merger_or_error, Exception) async def _idle_timeout_coroutine(self, idle_timeout: float): """ @@ -132,9 +132,8 @@ async def _finish_with_error(self, e: Exception): Helper function to close the stream and clean up resources after an error has occurred. """ - if isinstance(self._merger_or_error, _ReadRowsOperation): + if self.active: await self._merger_or_error.aclose() - del self._merger_or_error self._merger_or_error = e if self._idle_timeout_task is not None: self._idle_timeout_task.cancel() diff --git a/tests/unit/test_iterators.py b/tests/unit/test_iterators.py new file mode 100644 index 000000000..3948857d2 --- /dev/null +++ b/tests/unit/test_iterators.py @@ -0,0 +1,266 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import asyncio + +import pytest + +from google.cloud.bigtable._read_rows import _ReadRowsOperation + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock # type: ignore +except ImportError: # pragma: NO COVER + import mock # type: ignore + from mock import AsyncMock # type: ignore + + +class MockStream(_ReadRowsOperation): + """ + Mock a _ReadRowsOperation stream for testing + """ + + def __init__(self, items=None, errors=None, operation_timeout=None): + self.transient_errors = errors + self.operation_timeout = operation_timeout + self.next_idx = 0 + if items is None: + items = list(range(10)) + self.items = items + + def __aiter__(self): + return self + + async def __anext__(self): + if self.next_idx >= len(self.items): + raise StopAsyncIteration + item = self.items[self.next_idx] + self.next_idx += 1 + if isinstance(item, Exception): + raise item + return item + + async def aclose(self): + pass + + +class TestReadRowsIterator: + async def mock_stream(self, size=10): + for i in range(size): + yield i + + def _make_one(self, *args, **kwargs): + from google.cloud.bigtable.iterators import ReadRowsIterator + + stream = MockStream(*args, **kwargs) + return ReadRowsIterator(stream) + + def test_ctor(self): + with mock.patch("time.time", return_value=0): + iterator = self._make_one() + assert iterator.last_interaction_time == 0 + assert iterator._idle_timeout_task is None + assert iterator.request_stats is None + assert iterator.active is True + + def test___aiter__(self): + iterator = self._make_one() + assert iterator.__aiter__() is iterator + + @pytest.mark.asyncio + async def test__start_idle_timer(self): + """Should start timer coroutine""" + iterator = self._make_one() + expected_timeout = 10 + with mock.patch("time.time", return_value=1): + with mock.patch.object(iterator, "_idle_timeout_coroutine") as mock_coro: + await iterator._start_idle_timer(expected_timeout) + assert mock_coro.call_count == 1 + assert mock_coro.call_args[0] == (expected_timeout,) + assert iterator.last_interaction_time == 1 + assert iterator._idle_timeout_task is not None + + @pytest.mark.asyncio + async def test__start_idle_timer_duplicate(self): + """Multiple calls should replace task""" + iterator = self._make_one() + with mock.patch.object(iterator, "_idle_timeout_coroutine") as mock_coro: + await iterator._start_idle_timer(1) + first_task = iterator._idle_timeout_task + await iterator._start_idle_timer(2) + second_task = iterator._idle_timeout_task + assert mock_coro.call_count == 2 + + assert first_task is not None + assert first_task != second_task + # old tasks hould be cancelled + with pytest.raises(asyncio.CancelledError): + await first_task + # new task should not be cancelled + await second_task + + @pytest.mark.asyncio + async def test__idle_timeout_coroutine(self): + from google.cloud.bigtable.exceptions import IdleTimeout + + iterator = self._make_one() + await iterator._idle_timeout_coroutine(0.05) + await asyncio.sleep(0.1) + assert iterator.active is False + with pytest.raises(IdleTimeout): + await iterator.__anext__() + + @pytest.mark.asyncio + async def test__idle_timeout_coroutine_extensions(self): + """touching the generator should reset the idle timer""" + iterator = self._make_one(items=list(range(100))) + await iterator._start_idle_timer(0.05) + for i in range(10): + # will not expire as long as it is in use + assert iterator.active is True + await iterator.__anext__() + await asyncio.sleep(0.03) + # now let it expire + await asyncio.sleep(0.5) + assert iterator.active is False + + @pytest.mark.asyncio + async def test___anext__(self): + num_rows = 10 + iterator = self._make_one(items=list(range(num_rows))) + for i in range(num_rows): + assert await iterator.__anext__() == i + with pytest.raises(StopAsyncIteration): + await iterator.__anext__() + + @pytest.mark.asyncio + async def test___anext__with_request_stats(self): + """ + Request stats should not be yielded, but should be set on the iterator object + """ + from google.cloud.bigtable_v2.types import RequestStats + + stats = RequestStats() + items = [1, 2, stats, 3] + iterator = self._make_one(items=items) + assert await iterator.__anext__() == 1 + assert await iterator.__anext__() == 2 + assert iterator.request_stats is None + assert await iterator.__anext__() == 3 + with pytest.raises(StopAsyncIteration): + await iterator.__anext__() + assert iterator.request_stats == stats + + @pytest.mark.asyncio + async def test___anext__with_deadline_error(self): + """ + RetryErrors mean a deadline has been hit. + Should be wrapped in a DeadlineExceeded exception + """ + from google.api_core import exceptions as core_exceptions + + items = [1, core_exceptions.RetryError("retry error", None)] + expected_timeout = 99 + iterator = self._make_one(items=items, operation_timeout=expected_timeout) + assert await iterator.__anext__() == 1 + with pytest.raises(core_exceptions.DeadlineExceeded) as exc: + await iterator.__anext__() + assert f"operation_timeout of {expected_timeout:0.1f}s exceeded" in str( + exc.value + ) + assert exc.value.__cause__ is None + + @pytest.mark.asyncio + async def test___anext__with_deadline_error_with_cause(self): + """ + Transient errors should be exposed as an error group + """ + from google.api_core import exceptions as core_exceptions + from google.cloud.bigtable.exceptions import RetryExceptionGroup + + items = [1, core_exceptions.RetryError("retry error", None)] + expected_timeout = 99 + errors = [RuntimeError("error1"), ValueError("error2")] + iterator = self._make_one( + items=items, operation_timeout=expected_timeout, errors=errors + ) + assert await iterator.__anext__() == 1 + with pytest.raises(core_exceptions.DeadlineExceeded) as exc: + await iterator.__anext__() + assert f"operation_timeout of {expected_timeout:0.1f}s exceeded" in str( + exc.value + ) + error_group = exc.value.__cause__ + assert isinstance(error_group, RetryExceptionGroup) + assert len(error_group.exceptions) == 2 + assert error_group.exceptions[0] is errors[0] + assert error_group.exceptions[1] is errors[1] + assert "2 failed attempts" in str(error_group) + + @pytest.mark.asyncio + async def test___anext__with_error(self): + """ + Other errors should be raised as-is + """ + from google.api_core import exceptions as core_exceptions + + items = [1, core_exceptions.InternalServerError("mock error")] + iterator = self._make_one(items=items) + assert await iterator.__anext__() == 1 + with pytest.raises(core_exceptions.InternalServerError) as exc: + await iterator.__anext__() + assert exc.value is items[1] + assert iterator.active is False + # next call should raise same error + with pytest.raises(core_exceptions.InternalServerError) as exc: + await iterator.__anext__() + + @pytest.mark.asyncio + async def test__finish_with_error(self): + iterator = self._make_one() + await iterator._start_idle_timer(10) + timeout_task = iterator._idle_timeout_task + assert await iterator.__anext__() == 0 + assert iterator.active is True + err = ZeroDivisionError("mock error") + await iterator._finish_with_error(err) + assert iterator.active is False + assert iterator._merger_or_error is err + assert iterator._idle_timeout_task is None + with pytest.raises(ZeroDivisionError) as exc: + await iterator.__anext__() + assert exc.value is err + # timeout task should be cancelled + with pytest.raises(asyncio.CancelledError): + await timeout_task + + @pytest.mark.asyncio + async def test_aclose(self): + iterator = self._make_one() + await iterator._start_idle_timer(10) + timeout_task = iterator._idle_timeout_task + assert await iterator.__anext__() == 0 + assert iterator.active is True + await iterator.aclose() + assert iterator.active is False + assert isinstance(iterator._merger_or_error, StopAsyncIteration) + assert iterator._idle_timeout_task is None + with pytest.raises(StopAsyncIteration) as e: + await iterator.__anext__() + assert "closed" in str(e.value) + # timeout task should be cancelled + with pytest.raises(asyncio.CancelledError): + await timeout_task From ccd9545d6c43f705db670d68689cd9a63a031c73 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 14:15:22 -0700 Subject: [PATCH 320/349] added tests for timeouts --- google/cloud/bigtable/client.py | 17 +++++++++-- tests/unit/test_client.py | 32 ++++++++++++++++++++- tests/unit/test_client_read_rows.py | 44 +++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 49d556e5f..df85dc2ba 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -292,6 +292,10 @@ def get_table( instance_id: str, table_id: str, app_profile_id: str | None = None, + *, + default_operation_timeout: float = 60, + default_per_row_timeout: float | None = 10, + default_per_request_timeout: float | None = None, ) -> Table: """ Returns a table instance for making data API requests @@ -304,7 +308,7 @@ def get_table( app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles """ - return Table(self, instance_id, table_id, app_profile_id) + return Table(self, instance_id, table_id, app_profile_id, default_operation_timeout=default_operation_timeout, default_per_row_timeout=default_per_row_timeout, default_per_request_timeout=default_per_request_timeout) async def __aenter__(self): self.start_background_channel_refresh() @@ -355,8 +359,16 @@ def __init__( Raises: - RuntimeError if called outside of an async context (no running event loop) """ + # validate timeouts + if default_operation_timeout <= 0: + raise ValueError("default_operation_timeout must be greater than 0") + if default_per_row_timeout is not None and default_per_row_timeout <= 0: + raise ValueError("default_per_row_timeout must be greater than 0") + if default_per_request_timeout is not None and default_per_request_timeout <= 0: + raise ValueError("default_per_request_timeout must be greater than 0") + if default_per_request_timeout is not None and default_per_request_timeout > default_operation_timeout: + raise ValueError("default_per_request_timeout must be less than default_operation_timeout") self.client = client - self.instance_id = instance_id self.instance_name = self.client._gapic_client.instance_path( self.client.project, instance_id @@ -367,7 +379,6 @@ def __init__( ) self.app_profile_id = app_profile_id - self.app_profile_id = app_profile_id self.default_operation_timeout = default_operation_timeout self.default_per_row_timeout = default_per_row_timeout self.default_per_request_timeout = default_per_request_timeout diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 4146ba81d..13735707f 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -606,7 +606,7 @@ async def test__multiple_table_registration(): assert instance_1_path not in client._active_instances assert len(client._instance_owners[instance_1_path]) == 0 - +@pytest.mark.asyncio async def test__multiple_instance_registration(): async with _make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: @@ -800,6 +800,9 @@ async def test_table_ctor(): expected_table_id = "table-id" expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" + expected_operation_timeout = 123 + expected_per_row_timeout = 21 + expected_per_request_timeout = 12 client = BigtableDataClient() assert not client._active_instances @@ -808,6 +811,9 @@ async def test_table_ctor(): expected_instance_id, expected_table_id, expected_app_profile_id, + default_operation_timeout=expected_operation_timeout, + default_per_row_timeout=expected_per_row_timeout, + default_per_request_timeout=expected_per_request_timeout, ) await asyncio.sleep(0) assert table.table_id == expected_table_id @@ -815,6 +821,9 @@ async def test_table_ctor(): assert table.app_profile_id == expected_app_profile_id assert table.client is client assert table.instance_name in client._active_instances + assert table.default_operation_timeout == expected_operation_timeout + assert table.default_per_row_timeout == expected_per_row_timeout + assert table.default_per_request_timeout == expected_per_request_timeout # ensure task reaches completion await table._register_instance_task assert table._register_instance_task.done() @@ -822,6 +831,27 @@ async def test_table_ctor(): assert table._register_instance_task.exception() is None await client.close() +@pytest.mark.asyncio +async def test_table_ctor_bad_timeout_values(): + from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.client import Table + + client = BigtableDataClient() + + with pytest.raises(ValueError) as e: + table = Table(client, "", "", default_per_row_timeout=-1) + assert "default_per_row_timeout must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + table = Table(client, "", "", default_per_request_timeout=-1) + assert "default_per_request_timeout must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + table = Table(client, "", "", default_operation_timeout=-1) + assert "default_operation_timeout must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + table = Table(client, "", "", default_operation_timeout=1, default_per_request_timeout=2) + assert "default_per_request_timeout must be less than default_operation_timeout" in str(e.value) + await client.close() + def test_table_ctor_sync(): # initializing client in a sync context should raise RuntimeError diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index e18aeca9a..b79a192a1 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -459,3 +459,47 @@ async def test_read_rows_revise_request(): second_call_kwargs = revise_rowset.call_args_list[1].kwargs assert second_call_kwargs["row_set"] == "modified" assert second_call_kwargs["last_seen_row_key"] == b"test_1" + +@pytest.mark.asyncio +async def test_read_rows_default_timeouts(): + """ + Ensure that the default timeouts are set on the read rows operation when not overridden + """ + from google.cloud.bigtable._read_rows import _ReadRowsOperation + operation_timeout = 8 + per_row_timeout = 2 + per_request_timeout = 4 + with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: + mock_op.side_effect = RuntimeError("mock error") + async with _make_client() as client: + async with client.get_table("instance", "table", default_operation_timeout=operation_timeout, default_per_row_timeout=per_row_timeout, default_per_request_timeout=per_request_timeout) as table: + try: + await table.read_rows(ReadRowsQuery()) + except RuntimeError: + pass + kwargs = mock_op.call_args_list[0].kwargs + assert kwargs["operation_timeout"] == operation_timeout + assert kwargs["per_row_timeout"] == per_row_timeout + assert kwargs["per_request_timeout"] == per_request_timeout + +@pytest.mark.asyncio +async def test_read_rows_default_timeout_override(): + """ + When timeouts are passed, they overwrite default values + """ + from google.cloud.bigtable._read_rows import _ReadRowsOperation + operation_timeout = 8 + per_row_timeout = 2 + per_request_timeout = 4 + with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: + mock_op.side_effect = RuntimeError("mock error") + async with _make_client() as client: + async with client.get_table("instance", "table", default_operation_timeout=99, default_per_row_timeout=98, default_per_request_timeout=97) as table: + try: + await table.read_rows(ReadRowsQuery(), operation_timeout=operation_timeout, per_row_timeout=per_row_timeout, per_request_timeout=per_request_timeout) + except RuntimeError: + pass + kwargs = mock_op.call_args_list[0].kwargs + assert kwargs["operation_timeout"] == operation_timeout + assert kwargs["per_row_timeout"] == per_row_timeout + assert kwargs["per_request_timeout"] == per_request_timeout From ca84b9698895b4af33bd6dcdfdf8c8157367c728 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 14:16:06 -0700 Subject: [PATCH 321/349] ran black --- google/cloud/bigtable/client.py | 19 ++++++++++++++++--- tests/unit/test_client.py | 11 +++++++++-- tests/unit/test_client_read_rows.py | 27 ++++++++++++++++++++++++--- 3 files changed, 49 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index df85dc2ba..e2f4b1a40 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -308,7 +308,15 @@ def get_table( app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles """ - return Table(self, instance_id, table_id, app_profile_id, default_operation_timeout=default_operation_timeout, default_per_row_timeout=default_per_row_timeout, default_per_request_timeout=default_per_request_timeout) + return Table( + self, + instance_id, + table_id, + app_profile_id, + default_operation_timeout=default_operation_timeout, + default_per_row_timeout=default_per_row_timeout, + default_per_request_timeout=default_per_request_timeout, + ) async def __aenter__(self): self.start_background_channel_refresh() @@ -366,8 +374,13 @@ def __init__( raise ValueError("default_per_row_timeout must be greater than 0") if default_per_request_timeout is not None and default_per_request_timeout <= 0: raise ValueError("default_per_request_timeout must be greater than 0") - if default_per_request_timeout is not None and default_per_request_timeout > default_operation_timeout: - raise ValueError("default_per_request_timeout must be less than default_operation_timeout") + if ( + default_per_request_timeout is not None + and default_per_request_timeout > default_operation_timeout + ): + raise ValueError( + "default_per_request_timeout must be less than default_operation_timeout" + ) self.client = client self.instance_id = instance_id self.instance_name = self.client._gapic_client.instance_path( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 13735707f..205c7d15f 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -606,6 +606,7 @@ async def test__multiple_table_registration(): assert instance_1_path not in client._active_instances assert len(client._instance_owners[instance_1_path]) == 0 + @pytest.mark.asyncio async def test__multiple_instance_registration(): async with _make_one(project="project-id") as client: @@ -831,6 +832,7 @@ async def test_table_ctor(): assert table._register_instance_task.exception() is None await client.close() + @pytest.mark.asyncio async def test_table_ctor_bad_timeout_values(): from google.cloud.bigtable.client import BigtableDataClient @@ -848,8 +850,13 @@ async def test_table_ctor_bad_timeout_values(): table = Table(client, "", "", default_operation_timeout=-1) assert "default_operation_timeout must be greater than 0" in str(e.value) with pytest.raises(ValueError) as e: - table = Table(client, "", "", default_operation_timeout=1, default_per_request_timeout=2) - assert "default_per_request_timeout must be less than default_operation_timeout" in str(e.value) + table = Table( + client, "", "", default_operation_timeout=1, default_per_request_timeout=2 + ) + assert ( + "default_per_request_timeout must be less than default_operation_timeout" + in str(e.value) + ) await client.close() diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index b79a192a1..1ec4c93a7 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -460,19 +460,27 @@ async def test_read_rows_revise_request(): assert second_call_kwargs["row_set"] == "modified" assert second_call_kwargs["last_seen_row_key"] == b"test_1" + @pytest.mark.asyncio async def test_read_rows_default_timeouts(): """ Ensure that the default timeouts are set on the read rows operation when not overridden """ from google.cloud.bigtable._read_rows import _ReadRowsOperation + operation_timeout = 8 per_row_timeout = 2 per_request_timeout = 4 with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: mock_op.side_effect = RuntimeError("mock error") async with _make_client() as client: - async with client.get_table("instance", "table", default_operation_timeout=operation_timeout, default_per_row_timeout=per_row_timeout, default_per_request_timeout=per_request_timeout) as table: + async with client.get_table( + "instance", + "table", + default_operation_timeout=operation_timeout, + default_per_row_timeout=per_row_timeout, + default_per_request_timeout=per_request_timeout, + ) as table: try: await table.read_rows(ReadRowsQuery()) except RuntimeError: @@ -482,21 +490,34 @@ async def test_read_rows_default_timeouts(): assert kwargs["per_row_timeout"] == per_row_timeout assert kwargs["per_request_timeout"] == per_request_timeout + @pytest.mark.asyncio async def test_read_rows_default_timeout_override(): """ When timeouts are passed, they overwrite default values """ from google.cloud.bigtable._read_rows import _ReadRowsOperation + operation_timeout = 8 per_row_timeout = 2 per_request_timeout = 4 with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: mock_op.side_effect = RuntimeError("mock error") async with _make_client() as client: - async with client.get_table("instance", "table", default_operation_timeout=99, default_per_row_timeout=98, default_per_request_timeout=97) as table: + async with client.get_table( + "instance", + "table", + default_operation_timeout=99, + default_per_row_timeout=98, + default_per_request_timeout=97, + ) as table: try: - await table.read_rows(ReadRowsQuery(), operation_timeout=operation_timeout, per_row_timeout=per_row_timeout, per_request_timeout=per_request_timeout) + await table.read_rows( + ReadRowsQuery(), + operation_timeout=operation_timeout, + per_row_timeout=per_row_timeout, + per_request_timeout=per_request_timeout, + ) except RuntimeError: pass kwargs = mock_op.call_args_list[0].kwargs From eb936cfaf0cda11108a881d3bf74449ce804ca73 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 14:19:23 -0700 Subject: [PATCH 322/349] fixed lint issues --- google/cloud/bigtable/iterators.py | 3 ++- tests/unit/test_client.py | 8 ++++---- tests/unit/test_iterators.py | 2 -- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index 086d4e7e3..24241e094 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -133,7 +133,8 @@ async def _finish_with_error(self, e: Exception): after an error has occurred. """ if self.active: - await self._merger_or_error.aclose() + merger = cast(_ReadRowsOperation, self._merger_or_error) + await merger.aclose() self._merger_or_error = e if self._idle_timeout_task is not None: self._idle_timeout_task.cancel() diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 205c7d15f..75ba8e98a 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -841,16 +841,16 @@ async def test_table_ctor_bad_timeout_values(): client = BigtableDataClient() with pytest.raises(ValueError) as e: - table = Table(client, "", "", default_per_row_timeout=-1) + Table(client, "", "", default_per_row_timeout=-1) assert "default_per_row_timeout must be greater than 0" in str(e.value) with pytest.raises(ValueError) as e: - table = Table(client, "", "", default_per_request_timeout=-1) + Table(client, "", "", default_per_request_timeout=-1) assert "default_per_request_timeout must be greater than 0" in str(e.value) with pytest.raises(ValueError) as e: - table = Table(client, "", "", default_operation_timeout=-1) + Table(client, "", "", default_operation_timeout=-1) assert "default_operation_timeout must be greater than 0" in str(e.value) with pytest.raises(ValueError) as e: - table = Table( + Table( client, "", "", default_operation_timeout=1, default_per_request_timeout=2 ) assert ( diff --git a/tests/unit/test_iterators.py b/tests/unit/test_iterators.py index 3948857d2..0bd9e8444 100644 --- a/tests/unit/test_iterators.py +++ b/tests/unit/test_iterators.py @@ -22,10 +22,8 @@ # try/except added for compatibility with python < 3.8 try: from unittest import mock - from unittest.mock import AsyncMock # type: ignore except ImportError: # pragma: NO COVER import mock # type: ignore - from mock import AsyncMock # type: ignore class MockStream(_ReadRowsOperation): From ab4313867c172ae318ff2897fe12f74e8b92981d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 14:25:21 -0700 Subject: [PATCH 323/349] restructured test_client --- tests/unit/test_client.py | 1466 ++++++++++++++++++------------------- 1 file changed, 732 insertions(+), 734 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index b1af183a1..c4ea5941c 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -34,800 +34,798 @@ ) -def _get_target_class(): - from google.cloud.bigtable.client import BigtableDataClient - - return BigtableDataClient - - -def _make_one(*args, **kwargs): - return _get_target_class()(*args, **kwargs) +class TestBigtableDataClient(): + def _get_target_class(self): + from google.cloud.bigtable.client import BigtableDataClient + + return BigtableDataClient + + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + + @pytest.mark.asyncio + async def test_ctor(self): + expected_project = "project-id" + expected_pool_size = 11 + expected_credentials = AnonymousCredentials() + client = self._make_one( + project="project-id", + pool_size=expected_pool_size, + credentials=expected_credentials, + ) + await asyncio.sleep(0.1) + assert client.project == expected_project + assert len(client.transport._grpc_channel._pool) == expected_pool_size + assert not client._active_instances + assert len(client._channel_refresh_tasks) == expected_pool_size + assert client.transport._credentials == expected_credentials + await client.close() -@pytest.mark.asyncio -async def test_ctor(): - expected_project = "project-id" - expected_pool_size = 11 - expected_credentials = AnonymousCredentials() - client = _make_one( - project="project-id", - pool_size=expected_pool_size, - credentials=expected_credentials, - ) - await asyncio.sleep(0.1) - assert client.project == expected_project - assert len(client.transport._grpc_channel._pool) == expected_pool_size - assert not client._active_instances - assert len(client._channel_refresh_tasks) == expected_pool_size - assert client.transport._credentials == expected_credentials - await client.close() - - -@pytest.mark.asyncio -async def test_ctor_super_inits(): - from google.cloud.bigtable_v2.services.bigtable.async_client import ( - BigtableAsyncClient, - ) - from google.cloud.client import ClientWithProject - from google.api_core import client_options as client_options_lib - - project = "project-id" - pool_size = 11 - credentials = AnonymousCredentials() - client_options = {"api_endpoint": "foo.bar:1234"} - options_parsed = client_options_lib.from_dict(client_options) - transport_str = f"pooled_grpc_asyncio_{pool_size}" - with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: - bigtable_client_init.return_value = None - with mock.patch.object(ClientWithProject, "__init__") as client_project_init: - client_project_init.return_value = None + @pytest.mark.asyncio + async def test_ctor_super_inits(self): + from google.cloud.bigtable_v2.services.bigtable.async_client import ( + BigtableAsyncClient, + ) + from google.cloud.client import ClientWithProject + from google.api_core import client_options as client_options_lib + + project = "project-id" + pool_size = 11 + credentials = AnonymousCredentials() + client_options = {"api_endpoint": "foo.bar:1234"} + options_parsed = client_options_lib.from_dict(client_options) + transport_str = f"pooled_grpc_asyncio_{pool_size}" + with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: + bigtable_client_init.return_value = None + with mock.patch.object(ClientWithProject, "__init__") as client_project_init: + client_project_init.return_value = None + try: + self._make_one( + project=project, + pool_size=pool_size, + credentials=credentials, + client_options=options_parsed, + ) + except AttributeError: + pass + # test gapic superclass init was called + assert bigtable_client_init.call_count == 1 + kwargs = bigtable_client_init.call_args[1] + assert kwargs["transport"] == transport_str + assert kwargs["credentials"] == credentials + assert kwargs["client_options"] == options_parsed + # test mixin superclass init was called + assert client_project_init.call_count == 1 + kwargs = client_project_init.call_args[1] + assert kwargs["project"] == project + assert kwargs["credentials"] == credentials + assert kwargs["client_options"] == options_parsed + + + @pytest.mark.asyncio + async def test_ctor_dict_options(self): + from google.cloud.bigtable_v2.services.bigtable.async_client import ( + BigtableAsyncClient, + ) + from google.api_core.client_options import ClientOptions + from google.cloud.bigtable.client import BigtableDataClient + + client_options = {"api_endpoint": "foo.bar:1234"} + with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: try: - _make_one( - project=project, - pool_size=pool_size, - credentials=credentials, - client_options=options_parsed, - ) - except AttributeError: + self._make_one(client_options=client_options) + except TypeError: pass - # test gapic superclass init was called - assert bigtable_client_init.call_count == 1 + bigtable_client_init.assert_called_once() kwargs = bigtable_client_init.call_args[1] - assert kwargs["transport"] == transport_str - assert kwargs["credentials"] == credentials - assert kwargs["client_options"] == options_parsed - # test mixin superclass init was called - assert client_project_init.call_count == 1 - kwargs = client_project_init.call_args[1] - assert kwargs["project"] == project - assert kwargs["credentials"] == credentials - assert kwargs["client_options"] == options_parsed - - -@pytest.mark.asyncio -async def test_ctor_dict_options(): - from google.cloud.bigtable_v2.services.bigtable.async_client import ( - BigtableAsyncClient, - ) - from google.api_core.client_options import ClientOptions - from google.cloud.bigtable.client import BigtableDataClient - - client_options = {"api_endpoint": "foo.bar:1234"} - with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: - try: - _make_one(client_options=client_options) - except TypeError: - pass - bigtable_client_init.assert_called_once() - kwargs = bigtable_client_init.call_args[1] - called_options = kwargs["client_options"] - assert called_options.api_endpoint == "foo.bar:1234" - assert isinstance(called_options, ClientOptions) - with mock.patch.object( - BigtableDataClient, "start_background_channel_refresh" - ) as start_background_refresh: - client = _make_one(client_options=client_options) - start_background_refresh.assert_called_once() - await client.close() + called_options = kwargs["client_options"] + assert called_options.api_endpoint == "foo.bar:1234" + assert isinstance(called_options, ClientOptions) + with mock.patch.object( + BigtableDataClient, "start_background_channel_refresh" + ) as start_background_refresh: + client = self._make_one(client_options=client_options) + start_background_refresh.assert_called_once() + await client.close() -@pytest.mark.asyncio -async def test_veneer_grpc_headers(): - # client_info should be populated with headers to - # detect as a veneer client - patch = mock.patch("google.api_core.gapic_v1.method.wrap_method") - with patch as gapic_mock: - client = _make_one(project="project-id") - wrapped_call_list = gapic_mock.call_args_list - assert len(wrapped_call_list) > 0 - # each wrapped call should have veneer headers - for call in wrapped_call_list: - client_info = call.kwargs["client_info"] - assert client_info is not None, f"{call} has no client_info" - wrapped_user_agent_sorted = " ".join( - sorted(client_info.to_user_agent().split(" ")) - ) - assert VENEER_HEADER_REGEX.match( - wrapped_user_agent_sorted - ), f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}" - await client.close() + @pytest.mark.asyncio + async def test_veneer_grpc_headers(self): + # client_info should be populated with headers to + # detect as a veneer client + patch = mock.patch("google.api_core.gapic_v1.method.wrap_method") + with patch as gapic_mock: + client = self._make_one(project="project-id") + wrapped_call_list = gapic_mock.call_args_list + assert len(wrapped_call_list) > 0 + # each wrapped call should have veneer headers + for call in wrapped_call_list: + client_info = call.kwargs["client_info"] + assert client_info is not None, f"{call} has no client_info" + wrapped_user_agent_sorted = " ".join( + sorted(client_info.to_user_agent().split(" ")) + ) + assert VENEER_HEADER_REGEX.match( + wrapped_user_agent_sorted + ), f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}" + await client.close() -@pytest.mark.asyncio -async def test_channel_pool_creation(): - pool_size = 14 - with mock.patch( - "google.api_core.grpc_helpers_async.create_channel" - ) as create_channel: - create_channel.return_value = AsyncMock() - client = _make_one(project="project-id", pool_size=pool_size) - assert create_channel.call_count == pool_size + @pytest.mark.asyncio + async def test_channel_pool_creation(self): + pool_size = 14 + with mock.patch( + "google.api_core.grpc_helpers_async.create_channel" + ) as create_channel: + create_channel.return_value = AsyncMock() + client = self._make_one(project="project-id", pool_size=pool_size) + assert create_channel.call_count == pool_size + await client.close() + # channels should be unique + client = self._make_one(project="project-id", pool_size=pool_size) + pool_list = list(client.transport._grpc_channel._pool) + pool_set = set(client.transport._grpc_channel._pool) + assert len(pool_list) == len(pool_set) await client.close() - # channels should be unique - client = _make_one(project="project-id", pool_size=pool_size) - pool_list = list(client.transport._grpc_channel._pool) - pool_set = set(client.transport._grpc_channel._pool) - assert len(pool_list) == len(pool_set) - await client.close() - - -@pytest.mark.asyncio -async def test_channel_pool_rotation(): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledChannel, - ) - pool_size = 7 - with mock.patch.object(PooledChannel, "next_channel") as next_channel: - client = _make_one(project="project-id", pool_size=pool_size) - assert len(client.transport._grpc_channel._pool) == pool_size - next_channel.reset_mock() - with mock.patch.object( - type(client.transport._grpc_channel._pool[0]), "unary_unary" - ) as unary_unary: - # calling an rpc `pool_size` times should use a different channel each time - channel_next = None - for i in range(pool_size): - channel_last = channel_next - channel_next = client.transport.grpc_channel._pool[i] - assert channel_last != channel_next - next_channel.return_value = channel_next - client.transport.ping_and_warm() - assert next_channel.call_count == i + 1 - unary_unary.assert_called_once() - unary_unary.reset_mock() - await client.close() - - -@pytest.mark.asyncio -async def test_channel_pool_replace(): - with mock.patch.object(asyncio, "sleep"): + @pytest.mark.asyncio + async def test_channel_pool_rotation(self): + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledChannel, + ) + pool_size = 7 - client = _make_one(project="project-id", pool_size=pool_size) - for replace_idx in range(pool_size): - start_pool = [channel for channel in client.transport._grpc_channel._pool] - grace_period = 9 + + with mock.patch.object(PooledChannel, "next_channel") as next_channel: + client = self._make_one(project="project-id", pool_size=pool_size) + assert len(client.transport._grpc_channel._pool) == pool_size + next_channel.reset_mock() with mock.patch.object( - type(client.transport._grpc_channel._pool[0]), "close" - ) as close: - new_channel = grpc.aio.insecure_channel("localhost:8080") - await client.transport.replace_channel( - replace_idx, grace=grace_period, new_channel=new_channel - ) - close.assert_called_once_with(grace=grace_period) - close.assert_awaited_once() - assert client.transport._grpc_channel._pool[replace_idx] == new_channel - for i in range(pool_size): - if i != replace_idx: - assert client.transport._grpc_channel._pool[i] == start_pool[i] - else: - assert client.transport._grpc_channel._pool[i] != start_pool[i] + type(client.transport._grpc_channel._pool[0]), "unary_unary" + ) as unary_unary: + # calling an rpc `pool_size` times should use a different channel each time + channel_next = None + for i in range(pool_size): + channel_last = channel_next + channel_next = client.transport.grpc_channel._pool[i] + assert channel_last != channel_next + next_channel.return_value = channel_next + client.transport.ping_and_warm() + assert next_channel.call_count == i + 1 + unary_unary.assert_called_once() + unary_unary.reset_mock() await client.close() -@pytest.mark.filterwarnings("ignore::RuntimeWarning") -def test_start_background_channel_refresh_sync(): - # should raise RuntimeError if called in a sync context - client = _make_one(project="project-id") - with pytest.raises(RuntimeError): - client.start_background_channel_refresh() + @pytest.mark.asyncio + async def test_channel_pool_replace(self): + with mock.patch.object(asyncio, "sleep"): + pool_size = 7 + client = self._make_one(project="project-id", pool_size=pool_size) + for replace_idx in range(pool_size): + start_pool = [channel for channel in client.transport._grpc_channel._pool] + grace_period = 9 + with mock.patch.object( + type(client.transport._grpc_channel._pool[0]), "close" + ) as close: + new_channel = grpc.aio.insecure_channel("localhost:8080") + await client.transport.replace_channel( + replace_idx, grace=grace_period, new_channel=new_channel + ) + close.assert_called_once_with(grace=grace_period) + close.assert_awaited_once() + assert client.transport._grpc_channel._pool[replace_idx] == new_channel + for i in range(pool_size): + if i != replace_idx: + assert client.transport._grpc_channel._pool[i] == start_pool[i] + else: + assert client.transport._grpc_channel._pool[i] != start_pool[i] + await client.close() -@pytest.mark.asyncio -async def test_start_background_channel_refresh_tasks_exist(): - # if tasks exist, should do nothing - client = _make_one(project="project-id") - with mock.patch.object(asyncio, "create_task") as create_task: - client.start_background_channel_refresh() - create_task.assert_not_called() - await client.close() - - -@pytest.mark.asyncio -@pytest.mark.parametrize("pool_size", [1, 3, 7]) -async def test_start_background_channel_refresh(pool_size): - # should create background tasks for each channel - client = _make_one(project="project-id", pool_size=pool_size) - ping_and_warm = AsyncMock() - client._ping_and_warm_instances = ping_and_warm - client.start_background_channel_refresh() - assert len(client._channel_refresh_tasks) == pool_size - for task in client._channel_refresh_tasks: - assert isinstance(task, asyncio.Task) - await asyncio.sleep(0.1) - assert ping_and_warm.call_count == pool_size - for channel in client.transport._grpc_channel._pool: - ping_and_warm.assert_any_call(channel) - await client.close() - - -@pytest.mark.asyncio -@pytest.mark.skipif( - sys.version_info < (3, 8), reason="Task.name requires python3.8 or higher" -) -async def test_start_background_channel_refresh_tasks_names(): - # if tasks exist, should do nothing - pool_size = 3 - client = _make_one(project="project-id", pool_size=pool_size) - for i in range(pool_size): - name = client._channel_refresh_tasks[i].get_name() - assert str(i) in name - assert "BigtableDataClient channel refresh " in name - await client.close() - - -@pytest.mark.asyncio -async def test__ping_and_warm_instances(): - # test with no instances - with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: - client = _make_one(project="project-id", pool_size=1) - channel = client.transport._grpc_channel._pool[0] - await client._ping_and_warm_instances(channel) - gather.assert_called_once() - gather.assert_awaited_once() - assert not gather.call_args.args - assert gather.call_args.kwargs == {"return_exceptions": True} - # test with instances - client._active_instances = [ - "instance-1", - "instance-2", - "instance-3", - "instance-4", - ] - with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: - await client._ping_and_warm_instances(channel) - gather.assert_called_once() - gather.assert_awaited_once() - assert len(gather.call_args.args) == 4 - assert gather.call_args.kwargs == {"return_exceptions": True} - for idx, call in enumerate(gather.call_args.args): - assert isinstance(call, grpc.aio.UnaryUnaryCall) - call._request["name"] = client._active_instances[idx] - await client.close() - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "refresh_interval, wait_time, expected_sleep", - [ - (0, 0, 0), - (0, 1, 0), - (10, 0, 10), - (10, 5, 5), - (10, 10, 0), - (10, 15, 0), - ], -) -async def test__manage_channel_first_sleep(refresh_interval, wait_time, expected_sleep): - # first sleep time should be `refresh_interval` seconds after client init - import time + @pytest.mark.filterwarnings("ignore::RuntimeWarning") + def test_start_background_channel_refresh_sync(self): + # should raise RuntimeError if called in a sync context + client = self._make_one(project="project-id") + with pytest.raises(RuntimeError): + client.start_background_channel_refresh() - with mock.patch.object(time, "time") as time: - time.return_value = 0 - with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = asyncio.CancelledError - try: - client = _make_one(project="project-id") - client._channel_init_time = -wait_time - await client._manage_channel(0, refresh_interval, refresh_interval) - except asyncio.CancelledError: - pass - sleep.assert_called_once() - call_time = sleep.call_args[0][0] - assert ( - abs(call_time - expected_sleep) < 0.1 - ), f"refresh_interval: {refresh_interval}, wait_time: {wait_time}, expected_sleep: {expected_sleep}" - await client.close() + @pytest.mark.asyncio + async def test_start_background_channel_refresh_tasks_exist(self): + # if tasks exist, should do nothing + client = self._make_one(project="project-id") + with mock.patch.object(asyncio, "create_task") as create_task: + client.start_background_channel_refresh() + create_task.assert_not_called() + await client.close() -@pytest.mark.asyncio -async def test__manage_channel_ping_and_warm(): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledBigtableGrpcAsyncIOTransport, + + @pytest.mark.asyncio + @pytest.mark.parametrize("pool_size", [1, 3, 7]) + async def test_start_background_channel_refresh(self, pool_size): + # should create background tasks for each channel + client = self._make_one(project="project-id", pool_size=pool_size) + ping_and_warm = AsyncMock() + client._ping_and_warm_instances = ping_and_warm + client.start_background_channel_refresh() + assert len(client._channel_refresh_tasks) == pool_size + for task in client._channel_refresh_tasks: + assert isinstance(task, asyncio.Task) + await asyncio.sleep(0.1) + assert ping_and_warm.call_count == pool_size + for channel in client.transport._grpc_channel._pool: + ping_and_warm.assert_any_call(channel) + await client.close() + + + @pytest.mark.asyncio + @pytest.mark.skipif( + sys.version_info < (3, 8), reason="Task.name requires python3.8 or higher" ) + async def test_start_background_channel_refresh_tasks_names(self): + # if tasks exist, should do nothing + pool_size = 3 + client = self._make_one(project="project-id", pool_size=pool_size) + for i in range(pool_size): + name = client._channel_refresh_tasks[i].get_name() + assert str(i) in name + assert "BigtableDataClient channel refresh " in name + await client.close() - # should ping an warm all new channels, and old channels if sleeping - client = _make_one(project="project-id") - new_channel = grpc.aio.insecure_channel("localhost:8080") - with mock.patch.object(asyncio, "sleep"): - create_channel = mock.Mock() - create_channel.return_value = new_channel - client.transport.grpc_channel._create_channel = create_channel - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "replace_channel" - ) as replace_channel: - replace_channel.side_effect = asyncio.CancelledError - # should ping and warm old channel then new if sleep > 0 - with mock.patch.object( - type(_make_one()), "_ping_and_warm_instances" - ) as ping_and_warm: + + @pytest.mark.asyncio + async def test__ping_and_warm_instances(self): + # test with no instances + with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: + client = self._make_one(project="project-id", pool_size=1) + channel = client.transport._grpc_channel._pool[0] + await client._ping_and_warm_instances(channel) + gather.assert_called_once() + gather.assert_awaited_once() + assert not gather.call_args.args + assert gather.call_args.kwargs == {"return_exceptions": True} + # test with instances + client._active_instances = [ + "instance-1", + "instance-2", + "instance-3", + "instance-4", + ] + with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: + await client._ping_and_warm_instances(channel) + gather.assert_called_once() + gather.assert_awaited_once() + assert len(gather.call_args.args) == 4 + assert gather.call_args.kwargs == {"return_exceptions": True} + for idx, call in enumerate(gather.call_args.args): + assert isinstance(call, grpc.aio.UnaryUnaryCall) + call._request["name"] = client._active_instances[idx] + await client.close() + + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "refresh_interval, wait_time, expected_sleep", + [ + (0, 0, 0), + (0, 1, 0), + (10, 0, 10), + (10, 5, 5), + (10, 10, 0), + (10, 15, 0), + ], + ) + async def test__manage_channel_first_sleep(self, refresh_interval, wait_time, expected_sleep): + # first sleep time should be `refresh_interval` seconds after client init + import time + + with mock.patch.object(time, "time") as time: + time.return_value = 0 + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = asyncio.CancelledError try: - channel_idx = 2 - old_channel = client.transport._grpc_channel._pool[channel_idx] - await client._manage_channel(channel_idx, 10) + client = self._make_one(project="project-id") + client._channel_init_time = -wait_time + await client._manage_channel(0, refresh_interval, refresh_interval) except asyncio.CancelledError: pass - assert ping_and_warm.call_count == 2 - assert old_channel != new_channel - called_with = [call[0][0] for call in ping_and_warm.call_args_list] - assert old_channel in called_with - assert new_channel in called_with - # should ping and warm instantly new channel only if not sleeping + sleep.assert_called_once() + call_time = sleep.call_args[0][0] + assert ( + abs(call_time - expected_sleep) < 0.1 + ), f"refresh_interval: {refresh_interval}, wait_time: {wait_time}, expected_sleep: {expected_sleep}" + await client.close() + + + @pytest.mark.asyncio + async def test__manage_channel_ping_and_warm(self): + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) + + # should ping an warm all new channels, and old channels if sleeping + client = self._make_one(project="project-id") + new_channel = grpc.aio.insecure_channel("localhost:8080") + with mock.patch.object(asyncio, "sleep"): + create_channel = mock.Mock() + create_channel.return_value = new_channel + client.transport.grpc_channel._create_channel = create_channel with mock.patch.object( - type(_make_one()), "_ping_and_warm_instances" - ) as ping_and_warm: + PooledBigtableGrpcAsyncIOTransport, "replace_channel" + ) as replace_channel: + replace_channel.side_effect = asyncio.CancelledError + # should ping and warm old channel then new if sleep > 0 + with mock.patch.object( + type(self._make_one()), "_ping_and_warm_instances" + ) as ping_and_warm: + try: + channel_idx = 2 + old_channel = client.transport._grpc_channel._pool[channel_idx] + await client._manage_channel(channel_idx, 10) + except asyncio.CancelledError: + pass + assert ping_and_warm.call_count == 2 + assert old_channel != new_channel + called_with = [call[0][0] for call in ping_and_warm.call_args_list] + assert old_channel in called_with + assert new_channel in called_with + # should ping and warm instantly new channel only if not sleeping + with mock.patch.object( + type(self._make_one()), "_ping_and_warm_instances" + ) as ping_and_warm: + try: + await client._manage_channel(0, 0, 0) + except asyncio.CancelledError: + pass + ping_and_warm.assert_called_once_with(new_channel) + await client.close() + + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "refresh_interval, num_cycles, expected_sleep", + [ + (None, 1, 60 * 35), + (10, 10, 100), + (10, 1, 10), + ], + ) + async def test__manage_channel_sleeps(self, refresh_interval, num_cycles, expected_sleep): + # make sure that sleeps work as expected + import time + import random + + channel_idx = 1 + random.uniform = mock.Mock() + random.uniform.side_effect = lambda min_, max_: min_ + with mock.patch.object(time, "time") as time: + time.return_value = 0 + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = [None for i in range(num_cycles - 1)] + [ + asyncio.CancelledError + ] try: - await client._manage_channel(0, 0, 0) + client = self._make_one(project="project-id") + if refresh_interval is not None: + await client._manage_channel( + channel_idx, refresh_interval, refresh_interval + ) + else: + await client._manage_channel(channel_idx) except asyncio.CancelledError: pass - ping_and_warm.assert_called_once_with(new_channel) - await client.close() - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "refresh_interval, num_cycles, expected_sleep", - [ - (None, 1, 60 * 35), - (10, 10, 100), - (10, 1, 10), - ], -) -async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sleep): - # make sure that sleeps work as expected - import time - import random - - channel_idx = 1 - random.uniform = mock.Mock() - random.uniform.side_effect = lambda min_, max_: min_ - with mock.patch.object(time, "time") as time: - time.return_value = 0 - with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = [None for i in range(num_cycles - 1)] + [ - asyncio.CancelledError - ] - try: - client = _make_one(project="project-id") - if refresh_interval is not None: - await client._manage_channel( - channel_idx, refresh_interval, refresh_interval - ) - else: - await client._manage_channel(channel_idx) - except asyncio.CancelledError: - pass - assert sleep.call_count == num_cycles - total_sleep = sum([call[0][0] for call in sleep.call_args_list]) - assert ( - abs(total_sleep - expected_sleep) < 0.1 - ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" - await client.close() + assert sleep.call_count == num_cycles + total_sleep = sum([call[0][0] for call in sleep.call_args_list]) + assert ( + abs(total_sleep - expected_sleep) < 0.1 + ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" + await client.close() -@pytest.mark.asyncio -async def test__manage_channel_random(): - import random + @pytest.mark.asyncio + async def test__manage_channel_random(self): + import random - with mock.patch.object(asyncio, "sleep") as sleep: - with mock.patch.object(random, "uniform") as uniform: - uniform.return_value = 0 - try: - uniform.side_effect = asyncio.CancelledError - client = _make_one(project="project-id", pool_size=1) - except asyncio.CancelledError: - uniform.side_effect = None - uniform.reset_mock() - sleep.reset_mock() - min_val = 200 - max_val = 205 - uniform.side_effect = lambda min_, max_: min_ - sleep.side_effect = [None, None, asyncio.CancelledError] - try: - await client._manage_channel(0, min_val, max_val) - except asyncio.CancelledError: - pass - assert uniform.call_count == 2 - uniform_args = [call[0] for call in uniform.call_args_list] - for found_min, found_max in uniform_args: - assert found_min == min_val - assert found_max == max_val - - -@pytest.mark.asyncio -@pytest.mark.parametrize("num_cycles", [0, 1, 10, 100]) -async def test__manage_channel_refresh(num_cycles): - # make sure that channels are properly refreshed - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledBigtableGrpcAsyncIOTransport, - ) - from google.api_core import grpc_helpers_async - - expected_grace = 9 - expected_refresh = 0.5 - channel_idx = 1 - new_channel = grpc.aio.insecure_channel("localhost:8080") - - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "replace_channel" - ) as replace_channel: with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = [None for i in range(num_cycles)] + [ - asyncio.CancelledError - ] - with mock.patch.object( - grpc_helpers_async, "create_channel" - ) as create_channel: - create_channel.return_value = new_channel - client = _make_one(project="project-id") - create_channel.reset_mock() + with mock.patch.object(random, "uniform") as uniform: + uniform.return_value = 0 try: - await client._manage_channel( - channel_idx, - refresh_interval_min=expected_refresh, - refresh_interval_max=expected_refresh, - grace_period=expected_grace, - ) + uniform.side_effect = asyncio.CancelledError + client = self._make_one(project="project-id", pool_size=1) + except asyncio.CancelledError: + uniform.side_effect = None + uniform.reset_mock() + sleep.reset_mock() + min_val = 200 + max_val = 205 + uniform.side_effect = lambda min_, max_: min_ + sleep.side_effect = [None, None, asyncio.CancelledError] + try: + await client._manage_channel(0, min_val, max_val) except asyncio.CancelledError: pass - assert sleep.call_count == num_cycles + 1 - assert create_channel.call_count == num_cycles - assert replace_channel.call_count == num_cycles - for call in replace_channel.call_args_list: - args, kwargs = call - assert args[0] == channel_idx - assert kwargs["grace"] == expected_grace - assert kwargs["new_channel"] == new_channel - await client.close() + assert uniform.call_count == 2 + uniform_args = [call[0] for call in uniform.call_args_list] + for found_min, found_max in uniform_args: + assert found_min == min_val + assert found_max == max_val + + + @pytest.mark.asyncio + @pytest.mark.parametrize("num_cycles", [0, 1, 10, 100]) + async def test__manage_channel_refresh(self, num_cycles): + # make sure that channels are properly refreshed + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) + from google.api_core import grpc_helpers_async + + expected_grace = 9 + expected_refresh = 0.5 + channel_idx = 1 + new_channel = grpc.aio.insecure_channel("localhost:8080") + + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "replace_channel" + ) as replace_channel: + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = [None for i in range(num_cycles)] + [ + asyncio.CancelledError + ] + with mock.patch.object( + grpc_helpers_async, "create_channel" + ) as create_channel: + create_channel.return_value = new_channel + client = self._make_one(project="project-id") + create_channel.reset_mock() + try: + await client._manage_channel( + channel_idx, + refresh_interval_min=expected_refresh, + refresh_interval_max=expected_refresh, + grace_period=expected_grace, + ) + except asyncio.CancelledError: + pass + assert sleep.call_count == num_cycles + 1 + assert create_channel.call_count == num_cycles + assert replace_channel.call_count == num_cycles + for call in replace_channel.call_args_list: + args, kwargs = call + assert args[0] == channel_idx + assert kwargs["grace"] == expected_grace + assert kwargs["new_channel"] == new_channel + await client.close() + + + @pytest.mark.asyncio + @pytest.mark.filterwarnings("ignore::RuntimeWarning") + async def test__register_instance(self): + # create the client without calling start_background_channel_refresh + with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: + get_event_loop.side_effect = RuntimeError("no event loop") + client = self._make_one(project="project-id") + assert not client._channel_refresh_tasks + # first call should start background refresh + assert client._active_instances == set() + await client._register_instance("instance-1", mock.Mock()) + assert len(client._active_instances) == 1 + assert client._active_instances == {"projects/project-id/instances/instance-1"} + assert client._channel_refresh_tasks + # next call should not + with mock.patch.object( + type(self._make_one()), "start_background_channel_refresh" + ) as refresh_mock: + await client._register_instance("instance-2", mock.Mock()) + assert len(client._active_instances) == 2 + assert client._active_instances == { + "projects/project-id/instances/instance-1", + "projects/project-id/instances/instance-2", + } + refresh_mock.assert_not_called() + + + @pytest.mark.asyncio + @pytest.mark.filterwarnings("ignore::RuntimeWarning") + async def test__register_instance_ping_and_warm(self): + # should ping and warm each new instance + pool_size = 7 + with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: + get_event_loop.side_effect = RuntimeError("no event loop") + client = self._make_one(project="project-id", pool_size=pool_size) + # first call should start background refresh + assert not client._channel_refresh_tasks + await client._register_instance("instance-1", mock.Mock()) + client = self._make_one(project="project-id", pool_size=pool_size) + assert len(client._channel_refresh_tasks) == pool_size + assert not client._active_instances + # next calls should trigger ping and warm + with mock.patch.object(type(self._make_one()), "_ping_and_warm_instances") as ping_mock: + # new instance should trigger ping and warm + await client._register_instance("instance-2", mock.Mock()) + assert ping_mock.call_count == pool_size + await client._register_instance("instance-3", mock.Mock()) + assert ping_mock.call_count == pool_size * 2 + # duplcate instances should not trigger ping and warm + await client._register_instance("instance-3", mock.Mock()) + assert ping_mock.call_count == pool_size * 2 + await client.close() -@pytest.mark.asyncio -@pytest.mark.filterwarnings("ignore::RuntimeWarning") -async def test__register_instance(): - # create the client without calling start_background_channel_refresh - with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: - get_event_loop.side_effect = RuntimeError("no event loop") - client = _make_one(project="project-id") - assert not client._channel_refresh_tasks - # first call should start background refresh - assert client._active_instances == set() - await client._register_instance("instance-1", mock.Mock()) - assert len(client._active_instances) == 1 - assert client._active_instances == {"projects/project-id/instances/instance-1"} - assert client._channel_refresh_tasks - # next call should not - with mock.patch.object( - type(_make_one()), "start_background_channel_refresh" - ) as refresh_mock: - await client._register_instance("instance-2", mock.Mock()) + @pytest.mark.asyncio + async def test__remove_instance_registration(self): + client = self._make_one(project="project-id") + table = mock.Mock() + await client._register_instance("instance-1", table) + await client._register_instance("instance-2", table) assert len(client._active_instances) == 2 - assert client._active_instances == { - "projects/project-id/instances/instance-1", - "projects/project-id/instances/instance-2", - } - refresh_mock.assert_not_called() - - -@pytest.mark.asyncio -@pytest.mark.filterwarnings("ignore::RuntimeWarning") -async def test__register_instance_ping_and_warm(): - # should ping and warm each new instance - pool_size = 7 - with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: - get_event_loop.side_effect = RuntimeError("no event loop") - client = _make_one(project="project-id", pool_size=pool_size) - # first call should start background refresh - assert not client._channel_refresh_tasks - await client._register_instance("instance-1", mock.Mock()) - client = _make_one(project="project-id", pool_size=pool_size) - assert len(client._channel_refresh_tasks) == pool_size - assert not client._active_instances - # next calls should trigger ping and warm - with mock.patch.object(type(_make_one()), "_ping_and_warm_instances") as ping_mock: - # new instance should trigger ping and warm - await client._register_instance("instance-2", mock.Mock()) - assert ping_mock.call_count == pool_size - await client._register_instance("instance-3", mock.Mock()) - assert ping_mock.call_count == pool_size * 2 - # duplcate instances should not trigger ping and warm - await client._register_instance("instance-3", mock.Mock()) - assert ping_mock.call_count == pool_size * 2 - await client.close() - - -@pytest.mark.asyncio -async def test__remove_instance_registration(): - client = _make_one(project="project-id") - table = mock.Mock() - await client._register_instance("instance-1", table) - await client._register_instance("instance-2", table) - assert len(client._active_instances) == 2 - assert len(client._instance_owners.keys()) == 2 - instance_1_path = client._gapic_client.instance_path(client.project, "instance-1") - instance_2_path = client._gapic_client.instance_path(client.project, "instance-2") - assert len(client._instance_owners[instance_1_path]) == 1 - assert list(client._instance_owners[instance_1_path])[0] == id(table) - assert len(client._instance_owners[instance_2_path]) == 1 - assert list(client._instance_owners[instance_2_path])[0] == id(table) - success = await client._remove_instance_registration("instance-1", table) - assert success - assert len(client._active_instances) == 1 - assert len(client._instance_owners[instance_1_path]) == 0 - assert len(client._instance_owners[instance_2_path]) == 1 - assert client._active_instances == {"projects/project-id/instances/instance-2"} - success = await client._remove_instance_registration("nonexistant", table) - assert not success - assert len(client._active_instances) == 1 - await client.close() - - -@pytest.mark.asyncio -async def test__multiple_table_registration(): - async with _make_one(project="project-id") as client: - async with client.get_table("instance_1", "table_1") as table_1: - instance_1_path = client._gapic_client.instance_path( - client.project, "instance_1" - ) - assert len(client._instance_owners[instance_1_path]) == 1 - assert len(client._active_instances) == 1 - assert id(table_1) in client._instance_owners[instance_1_path] - async with client.get_table("instance_1", "table_2") as table_2: - assert len(client._instance_owners[instance_1_path]) == 2 - assert len(client._active_instances) == 1 - assert id(table_1) in client._instance_owners[instance_1_path] - assert id(table_2) in client._instance_owners[instance_1_path] - # table_2 should be unregistered, but instance should still be active - assert len(client._active_instances) == 1 - assert instance_1_path in client._active_instances - assert id(table_2) not in client._instance_owners[instance_1_path] - # both tables are gone. instance should be unregistered - assert len(client._active_instances) == 0 - assert instance_1_path not in client._active_instances + assert len(client._instance_owners.keys()) == 2 + instance_1_path = client._gapic_client.instance_path(client.project, "instance-1") + instance_2_path = client._gapic_client.instance_path(client.project, "instance-2") + assert len(client._instance_owners[instance_1_path]) == 1 + assert list(client._instance_owners[instance_1_path])[0] == id(table) + assert len(client._instance_owners[instance_2_path]) == 1 + assert list(client._instance_owners[instance_2_path])[0] == id(table) + success = await client._remove_instance_registration("instance-1", table) + assert success + assert len(client._active_instances) == 1 assert len(client._instance_owners[instance_1_path]) == 0 + assert len(client._instance_owners[instance_2_path]) == 1 + assert client._active_instances == {"projects/project-id/instances/instance-2"} + success = await client._remove_instance_registration("nonexistant", table) + assert not success + assert len(client._active_instances) == 1 + await client.close() -async def test__multiple_instance_registration(): - async with _make_one(project="project-id") as client: - async with client.get_table("instance_1", "table_1") as table_1: - async with client.get_table("instance_2", "table_2") as table_2: + @pytest.mark.asyncio + async def test__multiple_table_registration(self): + async with self._make_one(project="project-id") as client: + async with client.get_table("instance_1", "table_1") as table_1: instance_1_path = client._gapic_client.instance_path( client.project, "instance_1" ) - instance_2_path = client._gapic_client.instance_path( - client.project, "instance_2" - ) assert len(client._instance_owners[instance_1_path]) == 1 - assert len(client._instance_owners[instance_2_path]) == 1 - assert len(client._active_instances) == 2 + assert len(client._active_instances) == 1 + assert id(table_1) in client._instance_owners[instance_1_path] + async with client.get_table("instance_1", "table_2") as table_2: + assert len(client._instance_owners[instance_1_path]) == 2 + assert len(client._active_instances) == 1 + assert id(table_1) in client._instance_owners[instance_1_path] + assert id(table_2) in client._instance_owners[instance_1_path] + # table_2 should be unregistered, but instance should still be active + assert len(client._active_instances) == 1 + assert instance_1_path in client._active_instances + assert id(table_2) not in client._instance_owners[instance_1_path] + # both tables are gone. instance should be unregistered + assert len(client._active_instances) == 0 + assert instance_1_path not in client._active_instances + assert len(client._instance_owners[instance_1_path]) == 0 + + @pytest.mark.asyncio + async def test__multiple_instance_registration(self): + async with self._make_one(project="project-id") as client: + async with client.get_table("instance_1", "table_1") as table_1: + async with client.get_table("instance_2", "table_2") as table_2: + instance_1_path = client._gapic_client.instance_path( + client.project, "instance_1" + ) + instance_2_path = client._gapic_client.instance_path( + client.project, "instance_2" + ) + assert len(client._instance_owners[instance_1_path]) == 1 + assert len(client._instance_owners[instance_2_path]) == 1 + assert len(client._active_instances) == 2 + assert id(table_1) in client._instance_owners[instance_1_path] + assert id(table_2) in client._instance_owners[instance_2_path] + # instance2 should be unregistered, but instance1 should still be active + assert len(client._active_instances) == 1 + assert instance_1_path in client._active_instances + assert len(client._instance_owners[instance_2_path]) == 0 + assert len(client._instance_owners[instance_1_path]) == 1 assert id(table_1) in client._instance_owners[instance_1_path] - assert id(table_2) in client._instance_owners[instance_2_path] - # instance2 should be unregistered, but instance1 should still be active - assert len(client._active_instances) == 1 - assert instance_1_path in client._active_instances + # both tables are gone. instances should both be unregistered + assert len(client._active_instances) == 0 + assert len(client._instance_owners[instance_1_path]) == 0 assert len(client._instance_owners[instance_2_path]) == 0 - assert len(client._instance_owners[instance_1_path]) == 1 - assert id(table_1) in client._instance_owners[instance_1_path] - # both tables are gone. instances should both be unregistered - assert len(client._active_instances) == 0 - assert len(client._instance_owners[instance_1_path]) == 0 - assert len(client._instance_owners[instance_2_path]) == 0 - - -@pytest.mark.asyncio -async def test_get_table(): - from google.cloud.bigtable.client import Table - - client = _make_one(project="project-id") - assert not client._active_instances - expected_table_id = "table-id" - expected_instance_id = "instance-id" - expected_app_profile_id = "app-profile-id" - table = client.get_table( - expected_instance_id, - expected_table_id, - expected_app_profile_id, - ) - await asyncio.sleep(0) - assert isinstance(table, Table) - assert table.table_id == expected_table_id - assert ( - table.table_name - == f"projects/{client.project}/instances/{expected_instance_id}/tables/{expected_table_id}" - ) - assert table.instance_id == expected_instance_id - assert ( - table.instance_name - == f"projects/{client.project}/instances/{expected_instance_id}" - ) - assert table.app_profile_id == expected_app_profile_id - assert table.client is client - assert table.instance_name in client._active_instances - await client.close() - - -@pytest.mark.asyncio -async def test_get_table_context_manager(): - from google.cloud.bigtable.client import Table - - expected_table_id = "table-id" - expected_instance_id = "instance-id" - expected_app_profile_id = "app-profile-id" - expected_project_id = "project-id" - - with mock.patch.object(Table, "close") as close_mock: - async with _make_one(project=expected_project_id) as client: - async with client.get_table( - expected_instance_id, - expected_table_id, - expected_app_profile_id, - ) as table: - await asyncio.sleep(0) - assert isinstance(table, Table) - assert table.table_id == expected_table_id - assert ( - table.table_name - == f"projects/{expected_project_id}/instances/{expected_instance_id}/tables/{expected_table_id}" - ) - assert table.instance_id == expected_instance_id - assert ( - table.instance_name - == f"projects/{expected_project_id}/instances/{expected_instance_id}" - ) - assert table.app_profile_id == expected_app_profile_id - assert table.client is client - assert table.instance_name in client._active_instances - assert close_mock.call_count == 1 - - -@pytest.mark.asyncio -async def test_multiple_pool_sizes(): - # should be able to create multiple clients with different pool sizes without issue - pool_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256] - for pool_size in pool_sizes: - client = _make_one(project="project-id", pool_size=pool_size) - assert len(client._channel_refresh_tasks) == pool_size - client_duplicate = _make_one(project="project-id", pool_size=pool_size) - assert len(client_duplicate._channel_refresh_tasks) == pool_size - assert str(pool_size) in str(client.transport) + + + @pytest.mark.asyncio + async def test_get_table(self): + from google.cloud.bigtable.client import Table + + client = self._make_one(project="project-id") + assert not client._active_instances + expected_table_id = "table-id" + expected_instance_id = "instance-id" + expected_app_profile_id = "app-profile-id" + table = client.get_table( + expected_instance_id, + expected_table_id, + expected_app_profile_id, + ) + await asyncio.sleep(0) + assert isinstance(table, Table) + assert table.table_id == expected_table_id + assert ( + table.table_name + == f"projects/{client.project}/instances/{expected_instance_id}/tables/{expected_table_id}" + ) + assert table.instance_id == expected_instance_id + assert ( + table.instance_name + == f"projects/{client.project}/instances/{expected_instance_id}" + ) + assert table.app_profile_id == expected_app_profile_id + assert table.client is client + assert table.instance_name in client._active_instances await client.close() - await client_duplicate.close() -@pytest.mark.asyncio -async def test_close(): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledBigtableGrpcAsyncIOTransport, - ) + @pytest.mark.asyncio + async def test_get_table_context_manager(self): + from google.cloud.bigtable.client import Table + + expected_table_id = "table-id" + expected_instance_id = "instance-id" + expected_app_profile_id = "app-profile-id" + expected_project_id = "project-id" + + with mock.patch.object(Table, "close") as close_mock: + async with self._make_one(project=expected_project_id) as client: + async with client.get_table( + expected_instance_id, + expected_table_id, + expected_app_profile_id, + ) as table: + await asyncio.sleep(0) + assert isinstance(table, Table) + assert table.table_id == expected_table_id + assert ( + table.table_name + == f"projects/{expected_project_id}/instances/{expected_instance_id}/tables/{expected_table_id}" + ) + assert table.instance_id == expected_instance_id + assert ( + table.instance_name + == f"projects/{expected_project_id}/instances/{expected_instance_id}" + ) + assert table.app_profile_id == expected_app_profile_id + assert table.client is client + assert table.instance_name in client._active_instances + assert close_mock.call_count == 1 + + + @pytest.mark.asyncio + async def test_multiple_pool_sizes(self): + # should be able to create multiple clients with different pool sizes without issue + pool_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256] + for pool_size in pool_sizes: + client = self._make_one(project="project-id", pool_size=pool_size) + assert len(client._channel_refresh_tasks) == pool_size + client_duplicate = self._make_one(project="project-id", pool_size=pool_size) + assert len(client_duplicate._channel_refresh_tasks) == pool_size + assert str(pool_size) in str(client.transport) + await client.close() + await client_duplicate.close() + - pool_size = 7 - client = _make_one(project="project-id", pool_size=pool_size) - assert len(client._channel_refresh_tasks) == pool_size - tasks_list = list(client._channel_refresh_tasks) - for task in client._channel_refresh_tasks: - assert not task.done() - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "close", AsyncMock() - ) as close_mock: + @pytest.mark.asyncio + async def test_close(self): + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) + + pool_size = 7 + client = self._make_one(project="project-id", pool_size=pool_size) + assert len(client._channel_refresh_tasks) == pool_size + tasks_list = list(client._channel_refresh_tasks) + for task in client._channel_refresh_tasks: + assert not task.done() + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "close", AsyncMock() + ) as close_mock: + await client.close() + close_mock.assert_called_once() + close_mock.assert_awaited() + for task in tasks_list: + assert task.done() + assert task.cancelled() + assert client._channel_refresh_tasks == [] + + + @pytest.mark.asyncio + async def test_close_with_timeout(self): + pool_size = 7 + expected_timeout = 19 + client = self._make_one(project="project-id", pool_size=pool_size) + tasks = list(client._channel_refresh_tasks) + with mock.patch.object(asyncio, "wait_for", AsyncMock()) as wait_for_mock: + await client.close(timeout=expected_timeout) + wait_for_mock.assert_called_once() + wait_for_mock.assert_awaited() + assert wait_for_mock.call_args[1]["timeout"] == expected_timeout + client._channel_refresh_tasks = tasks await client.close() + + + @pytest.mark.asyncio + async def test_context_manager(self): + # context manager should close the client cleanly + close_mock = AsyncMock() + true_close = None + async with self._make_one(project="project-id") as client: + true_close = client.close() + client.close = close_mock + for task in client._channel_refresh_tasks: + assert not task.done() + assert client.project == "project-id" + assert client._active_instances == set() + close_mock.assert_not_called() close_mock.assert_called_once() close_mock.assert_awaited() - for task in tasks_list: - assert task.done() - assert task.cancelled() - assert client._channel_refresh_tasks == [] - - -@pytest.mark.asyncio -async def test_close_with_timeout(): - pool_size = 7 - expected_timeout = 19 - client = _make_one(project="project-id", pool_size=pool_size) - tasks = list(client._channel_refresh_tasks) - with mock.patch.object(asyncio, "wait_for", AsyncMock()) as wait_for_mock: - await client.close(timeout=expected_timeout) - wait_for_mock.assert_called_once() - wait_for_mock.assert_awaited() - assert wait_for_mock.call_args[1]["timeout"] == expected_timeout - client._channel_refresh_tasks = tasks - await client.close() - - -@pytest.mark.asyncio -async def test_context_manager(): - # context manager should close the client cleanly - close_mock = AsyncMock() - true_close = None - async with _make_one(project="project-id") as client: - true_close = client.close() - client.close = close_mock - for task in client._channel_refresh_tasks: - assert not task.done() - assert client.project == "project-id" - assert client._active_instances == set() - close_mock.assert_not_called() - close_mock.assert_called_once() - close_mock.assert_awaited() - # actually close the client - await true_close - - -def test_client_ctor_sync(): - # initializing client in a sync context should raise RuntimeError - from google.cloud.bigtable.client import BigtableDataClient - - with pytest.warns(RuntimeWarning) as warnings: - client = BigtableDataClient(project="project-id") - expected_warning = [w for w in warnings if "client.py" in w.filename] - assert len(expected_warning) == 1 - assert "BigtableDataClient should be started in an asyncio event loop." in str( - expected_warning[0].message - ) - assert client.project == "project-id" - assert client._channel_refresh_tasks == [] + # actually close the client + await true_close -###################################################################### -# Table Tests -###################################################################### + def test_client_ctor_sync(self): + # initializing client in a sync context should raise RuntimeError + from google.cloud.bigtable.client import BigtableDataClient + with pytest.warns(RuntimeWarning) as warnings: + client = BigtableDataClient(project="project-id") + expected_warning = [w for w in warnings if "client.py" in w.filename] + assert len(expected_warning) == 1 + assert "BigtableDataClient should be started in an asyncio event loop." in str( + expected_warning[0].message + ) + assert client.project == "project-id" + assert client._channel_refresh_tasks == [] + + +class TestTable(): + + @pytest.mark.asyncio + async def test_table_ctor(self): + from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.client import Table + + expected_table_id = "table-id" + expected_instance_id = "instance-id" + expected_app_profile_id = "app-profile-id" + client = BigtableDataClient() + assert not client._active_instances + + table = Table( + client, + expected_instance_id, + expected_table_id, + expected_app_profile_id, + ) + await asyncio.sleep(0) + assert table.table_id == expected_table_id + assert table.instance_id == expected_instance_id + assert table.app_profile_id == expected_app_profile_id + assert table.client is client + assert table.instance_name in client._active_instances + # ensure task reaches completion + await table._register_instance_task + assert table._register_instance_task.done() + assert not table._register_instance_task.cancelled() + assert table._register_instance_task.exception() is None + await client.close() -@pytest.mark.asyncio -async def test_table_ctor(): - from google.cloud.bigtable.client import BigtableDataClient - from google.cloud.bigtable.client import Table - expected_table_id = "table-id" - expected_instance_id = "instance-id" - expected_app_profile_id = "app-profile-id" - client = BigtableDataClient() - assert not client._active_instances + def test_table_ctor_sync(self): + # initializing client in a sync context should raise RuntimeError + from google.cloud.bigtable.client import Table - table = Table( - client, - expected_instance_id, - expected_table_id, - expected_app_profile_id, - ) - await asyncio.sleep(0) - assert table.table_id == expected_table_id - assert table.instance_id == expected_instance_id - assert table.app_profile_id == expected_app_profile_id - assert table.client is client - assert table.instance_name in client._active_instances - # ensure task reaches completion - await table._register_instance_task - assert table._register_instance_task.done() - assert not table._register_instance_task.cancelled() - assert table._register_instance_task.exception() is None - await client.close() - - -def test_table_ctor_sync(): - # initializing client in a sync context should raise RuntimeError - from google.cloud.bigtable.client import Table - - client = mock.Mock() - with pytest.raises(RuntimeError) as e: - Table(client, "instance-id", "table-id") - assert e.match("Table must be created within an async event loop context.") + client = mock.Mock() + with pytest.raises(RuntimeError) as e: + Table(client, "instance-id", "table-id") + assert e.match("Table must be created within an async event loop context.") From cb1884d54a555bc7428e0cb7aada8522a108aa1a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 14:26:37 -0700 Subject: [PATCH 324/349] changed how random is mocked --- tests/unit/test_client.py | 46 +++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c4ea5941c..1b4c79c27 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -401,29 +401,29 @@ async def test__manage_channel_sleeps(self, refresh_interval, num_cycles, expect import random channel_idx = 1 - random.uniform = mock.Mock() - random.uniform.side_effect = lambda min_, max_: min_ - with mock.patch.object(time, "time") as time: - time.return_value = 0 - with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = [None for i in range(num_cycles - 1)] + [ - asyncio.CancelledError - ] - try: - client = self._make_one(project="project-id") - if refresh_interval is not None: - await client._manage_channel( - channel_idx, refresh_interval, refresh_interval - ) - else: - await client._manage_channel(channel_idx) - except asyncio.CancelledError: - pass - assert sleep.call_count == num_cycles - total_sleep = sum([call[0][0] for call in sleep.call_args_list]) - assert ( - abs(total_sleep - expected_sleep) < 0.1 - ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" + with mock.patch.object(random, "uniform") as uniform: + uniform.side_effect = lambda min_, max_: min_ + with mock.patch.object(time, "time") as time: + time.return_value = 0 + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = [None for i in range(num_cycles - 1)] + [ + asyncio.CancelledError + ] + try: + client = self._make_one(project="project-id") + if refresh_interval is not None: + await client._manage_channel( + channel_idx, refresh_interval, refresh_interval + ) + else: + await client._manage_channel(channel_idx) + except asyncio.CancelledError: + pass + assert sleep.call_count == num_cycles + total_sleep = sum([call[0][0] for call in sleep.call_args_list]) + assert ( + abs(total_sleep - expected_sleep) < 0.1 + ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" await client.close() From 9a89d7438dc8be60e15fa4dc5720f804f390de62 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 14:27:06 -0700 Subject: [PATCH 325/349] ran black --- tests/unit/test_client.py | 63 ++++++++++++++------------------------- 1 file changed, 23 insertions(+), 40 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 1b4c79c27..ca7220800 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -34,17 +34,15 @@ ) -class TestBigtableDataClient(): +class TestBigtableDataClient: def _get_target_class(self): from google.cloud.bigtable.client import BigtableDataClient return BigtableDataClient - def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) - @pytest.mark.asyncio async def test_ctor(self): expected_project = "project-id" @@ -63,7 +61,6 @@ async def test_ctor(self): assert client.transport._credentials == expected_credentials await client.close() - @pytest.mark.asyncio async def test_ctor_super_inits(self): from google.cloud.bigtable_v2.services.bigtable.async_client import ( @@ -80,7 +77,9 @@ async def test_ctor_super_inits(self): transport_str = f"pooled_grpc_asyncio_{pool_size}" with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: bigtable_client_init.return_value = None - with mock.patch.object(ClientWithProject, "__init__") as client_project_init: + with mock.patch.object( + ClientWithProject, "__init__" + ) as client_project_init: client_project_init.return_value = None try: self._make_one( @@ -104,7 +103,6 @@ async def test_ctor_super_inits(self): assert kwargs["credentials"] == credentials assert kwargs["client_options"] == options_parsed - @pytest.mark.asyncio async def test_ctor_dict_options(self): from google.cloud.bigtable_v2.services.bigtable.async_client import ( @@ -131,7 +129,6 @@ async def test_ctor_dict_options(self): start_background_refresh.assert_called_once() await client.close() - @pytest.mark.asyncio async def test_veneer_grpc_headers(self): # client_info should be populated with headers to @@ -153,7 +150,6 @@ async def test_veneer_grpc_headers(self): ), f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}" await client.close() - @pytest.mark.asyncio async def test_channel_pool_creation(self): pool_size = 14 @@ -171,7 +167,6 @@ async def test_channel_pool_creation(self): assert len(pool_list) == len(pool_set) await client.close() - @pytest.mark.asyncio async def test_channel_pool_rotation(self): from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( @@ -200,14 +195,15 @@ async def test_channel_pool_rotation(self): unary_unary.reset_mock() await client.close() - @pytest.mark.asyncio async def test_channel_pool_replace(self): with mock.patch.object(asyncio, "sleep"): pool_size = 7 client = self._make_one(project="project-id", pool_size=pool_size) for replace_idx in range(pool_size): - start_pool = [channel for channel in client.transport._grpc_channel._pool] + start_pool = [ + channel for channel in client.transport._grpc_channel._pool + ] grace_period = 9 with mock.patch.object( type(client.transport._grpc_channel._pool[0]), "close" @@ -226,7 +222,6 @@ async def test_channel_pool_replace(self): assert client.transport._grpc_channel._pool[i] != start_pool[i] await client.close() - @pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_start_background_channel_refresh_sync(self): # should raise RuntimeError if called in a sync context @@ -234,7 +229,6 @@ def test_start_background_channel_refresh_sync(self): with pytest.raises(RuntimeError): client.start_background_channel_refresh() - @pytest.mark.asyncio async def test_start_background_channel_refresh_tasks_exist(self): # if tasks exist, should do nothing @@ -244,7 +238,6 @@ async def test_start_background_channel_refresh_tasks_exist(self): create_task.assert_not_called() await client.close() - @pytest.mark.asyncio @pytest.mark.parametrize("pool_size", [1, 3, 7]) async def test_start_background_channel_refresh(self, pool_size): @@ -262,7 +255,6 @@ async def test_start_background_channel_refresh(self, pool_size): ping_and_warm.assert_any_call(channel) await client.close() - @pytest.mark.asyncio @pytest.mark.skipif( sys.version_info < (3, 8), reason="Task.name requires python3.8 or higher" @@ -277,7 +269,6 @@ async def test_start_background_channel_refresh_tasks_names(self): assert "BigtableDataClient channel refresh " in name await client.close() - @pytest.mark.asyncio async def test__ping_and_warm_instances(self): # test with no instances @@ -307,7 +298,6 @@ async def test__ping_and_warm_instances(self): call._request["name"] = client._active_instances[idx] await client.close() - @pytest.mark.asyncio @pytest.mark.parametrize( "refresh_interval, wait_time, expected_sleep", @@ -320,7 +310,9 @@ async def test__ping_and_warm_instances(self): (10, 15, 0), ], ) - async def test__manage_channel_first_sleep(self, refresh_interval, wait_time, expected_sleep): + async def test__manage_channel_first_sleep( + self, refresh_interval, wait_time, expected_sleep + ): # first sleep time should be `refresh_interval` seconds after client init import time @@ -341,7 +333,6 @@ async def test__manage_channel_first_sleep(self, refresh_interval, wait_time, ex ), f"refresh_interval: {refresh_interval}, wait_time: {wait_time}, expected_sleep: {expected_sleep}" await client.close() - @pytest.mark.asyncio async def test__manage_channel_ping_and_warm(self): from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( @@ -385,7 +376,6 @@ async def test__manage_channel_ping_and_warm(self): ping_and_warm.assert_called_once_with(new_channel) await client.close() - @pytest.mark.asyncio @pytest.mark.parametrize( "refresh_interval, num_cycles, expected_sleep", @@ -395,7 +385,9 @@ async def test__manage_channel_ping_and_warm(self): (10, 1, 10), ], ) - async def test__manage_channel_sleeps(self, refresh_interval, num_cycles, expected_sleep): + async def test__manage_channel_sleeps( + self, refresh_interval, num_cycles, expected_sleep + ): # make sure that sleeps work as expected import time import random @@ -426,7 +418,6 @@ async def test__manage_channel_sleeps(self, refresh_interval, num_cycles, expect ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" await client.close() - @pytest.mark.asyncio async def test__manage_channel_random(self): import random @@ -455,7 +446,6 @@ async def test__manage_channel_random(self): assert found_min == min_val assert found_max == max_val - @pytest.mark.asyncio @pytest.mark.parametrize("num_cycles", [0, 1, 10, 100]) async def test__manage_channel_refresh(self, num_cycles): @@ -502,7 +492,6 @@ async def test__manage_channel_refresh(self, num_cycles): assert kwargs["new_channel"] == new_channel await client.close() - @pytest.mark.asyncio @pytest.mark.filterwarnings("ignore::RuntimeWarning") async def test__register_instance(self): @@ -529,7 +518,6 @@ async def test__register_instance(self): } refresh_mock.assert_not_called() - @pytest.mark.asyncio @pytest.mark.filterwarnings("ignore::RuntimeWarning") async def test__register_instance_ping_and_warm(self): @@ -545,7 +533,9 @@ async def test__register_instance_ping_and_warm(self): assert len(client._channel_refresh_tasks) == pool_size assert not client._active_instances # next calls should trigger ping and warm - with mock.patch.object(type(self._make_one()), "_ping_and_warm_instances") as ping_mock: + with mock.patch.object( + type(self._make_one()), "_ping_and_warm_instances" + ) as ping_mock: # new instance should trigger ping and warm await client._register_instance("instance-2", mock.Mock()) assert ping_mock.call_count == pool_size @@ -556,7 +546,6 @@ async def test__register_instance_ping_and_warm(self): assert ping_mock.call_count == pool_size * 2 await client.close() - @pytest.mark.asyncio async def test__remove_instance_registration(self): client = self._make_one(project="project-id") @@ -565,8 +554,12 @@ async def test__remove_instance_registration(self): await client._register_instance("instance-2", table) assert len(client._active_instances) == 2 assert len(client._instance_owners.keys()) == 2 - instance_1_path = client._gapic_client.instance_path(client.project, "instance-1") - instance_2_path = client._gapic_client.instance_path(client.project, "instance-2") + instance_1_path = client._gapic_client.instance_path( + client.project, "instance-1" + ) + instance_2_path = client._gapic_client.instance_path( + client.project, "instance-2" + ) assert len(client._instance_owners[instance_1_path]) == 1 assert list(client._instance_owners[instance_1_path])[0] == id(table) assert len(client._instance_owners[instance_2_path]) == 1 @@ -582,7 +575,6 @@ async def test__remove_instance_registration(self): assert len(client._active_instances) == 1 await client.close() - @pytest.mark.asyncio async def test__multiple_table_registration(self): async with self._make_one(project="project-id") as client: @@ -634,7 +626,6 @@ async def test__multiple_instance_registration(self): assert len(client._instance_owners[instance_1_path]) == 0 assert len(client._instance_owners[instance_2_path]) == 0 - @pytest.mark.asyncio async def test_get_table(self): from google.cloud.bigtable.client import Table @@ -666,7 +657,6 @@ async def test_get_table(self): assert table.instance_name in client._active_instances await client.close() - @pytest.mark.asyncio async def test_get_table_context_manager(self): from google.cloud.bigtable.client import Table @@ -700,7 +690,6 @@ async def test_get_table_context_manager(self): assert table.instance_name in client._active_instances assert close_mock.call_count == 1 - @pytest.mark.asyncio async def test_multiple_pool_sizes(self): # should be able to create multiple clients with different pool sizes without issue @@ -714,7 +703,6 @@ async def test_multiple_pool_sizes(self): await client.close() await client_duplicate.close() - @pytest.mark.asyncio async def test_close(self): from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( @@ -738,7 +726,6 @@ async def test_close(self): assert task.cancelled() assert client._channel_refresh_tasks == [] - @pytest.mark.asyncio async def test_close_with_timeout(self): pool_size = 7 @@ -753,7 +740,6 @@ async def test_close_with_timeout(self): client._channel_refresh_tasks = tasks await client.close() - @pytest.mark.asyncio async def test_context_manager(self): # context manager should close the client cleanly @@ -772,7 +758,6 @@ async def test_context_manager(self): # actually close the client await true_close - def test_client_ctor_sync(self): # initializing client in a sync context should raise RuntimeError from google.cloud.bigtable.client import BigtableDataClient @@ -788,8 +773,7 @@ def test_client_ctor_sync(self): assert client._channel_refresh_tasks == [] -class TestTable(): - +class TestTable: @pytest.mark.asyncio async def test_table_ctor(self): from google.cloud.bigtable.client import BigtableDataClient @@ -820,7 +804,6 @@ async def test_table_ctor(self): assert table._register_instance_task.exception() is None await client.close() - def test_table_ctor_sync(self): # initializing client in a sync context should raise RuntimeError from google.cloud.bigtable.client import Table From 7f783fcabf8c394d6c9296a3052769f1870b732e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 14:33:09 -0700 Subject: [PATCH 326/349] restructred test_client --- tests/unit/test_client.py | 1527 ++++++++++++++++++------------------- 1 file changed, 755 insertions(+), 772 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 75ba8e98a..b65673e04 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -34,837 +34,820 @@ ) -def _get_target_class(): - from google.cloud.bigtable.client import BigtableDataClient - - return BigtableDataClient - - -def _make_one(*args, **kwargs): - return _get_target_class()(*args, **kwargs) +class TestBigtableDataClient: + def _get_target_class(self): + from google.cloud.bigtable.client import BigtableDataClient + + return BigtableDataClient + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + @pytest.mark.asyncio + async def test_ctor(self): + expected_project = "project-id" + expected_pool_size = 11 + expected_credentials = AnonymousCredentials() + client = self._make_one( + project="project-id", + pool_size=expected_pool_size, + credentials=expected_credentials, + ) + await asyncio.sleep(0.1) + assert client.project == expected_project + assert len(client.transport._grpc_channel._pool) == expected_pool_size + assert not client._active_instances + assert len(client._channel_refresh_tasks) == expected_pool_size + assert client.transport._credentials == expected_credentials + await client.close() + @pytest.mark.asyncio + async def test_ctor_super_inits(self): + from google.cloud.bigtable_v2.services.bigtable.async_client import ( + BigtableAsyncClient, + ) + from google.cloud.client import ClientWithProject + from google.api_core import client_options as client_options_lib + + project = "project-id" + pool_size = 11 + credentials = AnonymousCredentials() + client_options = {"api_endpoint": "foo.bar:1234"} + options_parsed = client_options_lib.from_dict(client_options) + transport_str = f"pooled_grpc_asyncio_{pool_size}" + with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: + bigtable_client_init.return_value = None + with mock.patch.object( + ClientWithProject, "__init__" + ) as client_project_init: + client_project_init.return_value = None + try: + self._make_one( + project=project, + pool_size=pool_size, + credentials=credentials, + client_options=options_parsed, + ) + except AttributeError: + pass + # test gapic superclass init was called + assert bigtable_client_init.call_count == 1 + kwargs = bigtable_client_init.call_args[1] + assert kwargs["transport"] == transport_str + assert kwargs["credentials"] == credentials + assert kwargs["client_options"] == options_parsed + # test mixin superclass init was called + assert client_project_init.call_count == 1 + kwargs = client_project_init.call_args[1] + assert kwargs["project"] == project + assert kwargs["credentials"] == credentials + assert kwargs["client_options"] == options_parsed + + @pytest.mark.asyncio + async def test_ctor_dict_options(self): + from google.cloud.bigtable_v2.services.bigtable.async_client import ( + BigtableAsyncClient, + ) + from google.api_core.client_options import ClientOptions + from google.cloud.bigtable.client import BigtableDataClient -@pytest.mark.asyncio -async def test_ctor(): - expected_project = "project-id" - expected_pool_size = 11 - expected_credentials = AnonymousCredentials() - client = _make_one( - project="project-id", - pool_size=expected_pool_size, - credentials=expected_credentials, - ) - await asyncio.sleep(0.1) - assert client.project == expected_project - assert len(client.transport._grpc_channel._pool) == expected_pool_size - assert not client._active_instances - assert len(client._channel_refresh_tasks) == expected_pool_size - assert client.transport._credentials == expected_credentials - await client.close() - - -@pytest.mark.asyncio -async def test_ctor_super_inits(): - from google.cloud.bigtable_v2.services.bigtable.async_client import ( - BigtableAsyncClient, - ) - from google.cloud.client import ClientWithProject - from google.api_core import client_options as client_options_lib - - project = "project-id" - pool_size = 11 - credentials = AnonymousCredentials() - client_options = {"api_endpoint": "foo.bar:1234"} - options_parsed = client_options_lib.from_dict(client_options) - transport_str = f"pooled_grpc_asyncio_{pool_size}" - with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: - bigtable_client_init.return_value = None - with mock.patch.object(ClientWithProject, "__init__") as client_project_init: - client_project_init.return_value = None + client_options = {"api_endpoint": "foo.bar:1234"} + with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: try: - _make_one( - project=project, - pool_size=pool_size, - credentials=credentials, - client_options=options_parsed, - ) - except AttributeError: + self._make_one(client_options=client_options) + except TypeError: pass - # test gapic superclass init was called - assert bigtable_client_init.call_count == 1 + bigtable_client_init.assert_called_once() kwargs = bigtable_client_init.call_args[1] - assert kwargs["transport"] == transport_str - assert kwargs["credentials"] == credentials - assert kwargs["client_options"] == options_parsed - # test mixin superclass init was called - assert client_project_init.call_count == 1 - kwargs = client_project_init.call_args[1] - assert kwargs["project"] == project - assert kwargs["credentials"] == credentials - assert kwargs["client_options"] == options_parsed - - -@pytest.mark.asyncio -async def test_ctor_dict_options(): - from google.cloud.bigtable_v2.services.bigtable.async_client import ( - BigtableAsyncClient, - ) - from google.api_core.client_options import ClientOptions - from google.cloud.bigtable.client import BigtableDataClient - - client_options = {"api_endpoint": "foo.bar:1234"} - with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: - try: - _make_one(client_options=client_options) - except TypeError: - pass - bigtable_client_init.assert_called_once() - kwargs = bigtable_client_init.call_args[1] - called_options = kwargs["client_options"] - assert called_options.api_endpoint == "foo.bar:1234" - assert isinstance(called_options, ClientOptions) - with mock.patch.object( - BigtableDataClient, "start_background_channel_refresh" - ) as start_background_refresh: - client = _make_one(client_options=client_options) - start_background_refresh.assert_called_once() - await client.close() - - -@pytest.mark.asyncio -async def test_veneer_grpc_headers(): - # client_info should be populated with headers to - # detect as a veneer client - patch = mock.patch("google.api_core.gapic_v1.method.wrap_method") - with patch as gapic_mock: - client = _make_one(project="project-id") - wrapped_call_list = gapic_mock.call_args_list - assert len(wrapped_call_list) > 0 - # each wrapped call should have veneer headers - for call in wrapped_call_list: - client_info = call.kwargs["client_info"] - assert client_info is not None, f"{call} has no client_info" - wrapped_user_agent_sorted = " ".join( - sorted(client_info.to_user_agent().split(" ")) - ) - assert VENEER_HEADER_REGEX.match( - wrapped_user_agent_sorted - ), f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}" - await client.close() + called_options = kwargs["client_options"] + assert called_options.api_endpoint == "foo.bar:1234" + assert isinstance(called_options, ClientOptions) + with mock.patch.object( + BigtableDataClient, "start_background_channel_refresh" + ) as start_background_refresh: + client = self._make_one(client_options=client_options) + start_background_refresh.assert_called_once() + await client.close() + @pytest.mark.asyncio + async def test_veneer_grpc_headers(self): + # client_info should be populated with headers to + # detect as a veneer client + patch = mock.patch("google.api_core.gapic_v1.method.wrap_method") + with patch as gapic_mock: + client = self._make_one(project="project-id") + wrapped_call_list = gapic_mock.call_args_list + assert len(wrapped_call_list) > 0 + # each wrapped call should have veneer headers + for call in wrapped_call_list: + client_info = call.kwargs["client_info"] + assert client_info is not None, f"{call} has no client_info" + wrapped_user_agent_sorted = " ".join( + sorted(client_info.to_user_agent().split(" ")) + ) + assert VENEER_HEADER_REGEX.match( + wrapped_user_agent_sorted + ), f"'{wrapped_user_agent_sorted}' does not match {VENEER_HEADER_REGEX}" + await client.close() -@pytest.mark.asyncio -async def test_channel_pool_creation(): - pool_size = 14 - with mock.patch( - "google.api_core.grpc_helpers_async.create_channel" - ) as create_channel: - create_channel.return_value = AsyncMock() - client = _make_one(project="project-id", pool_size=pool_size) - assert create_channel.call_count == pool_size + @pytest.mark.asyncio + async def test_channel_pool_creation(self): + pool_size = 14 + with mock.patch( + "google.api_core.grpc_helpers_async.create_channel" + ) as create_channel: + create_channel.return_value = AsyncMock() + client = self._make_one(project="project-id", pool_size=pool_size) + assert create_channel.call_count == pool_size + await client.close() + # channels should be unique + client = self._make_one(project="project-id", pool_size=pool_size) + pool_list = list(client.transport._grpc_channel._pool) + pool_set = set(client.transport._grpc_channel._pool) + assert len(pool_list) == len(pool_set) await client.close() - # channels should be unique - client = _make_one(project="project-id", pool_size=pool_size) - pool_list = list(client.transport._grpc_channel._pool) - pool_set = set(client.transport._grpc_channel._pool) - assert len(pool_list) == len(pool_set) - await client.close() - - -@pytest.mark.asyncio -async def test_channel_pool_rotation(): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledChannel, - ) - pool_size = 7 + @pytest.mark.asyncio + async def test_channel_pool_rotation(self): + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledChannel, + ) - with mock.patch.object(PooledChannel, "next_channel") as next_channel: - client = _make_one(project="project-id", pool_size=pool_size) - assert len(client.transport._grpc_channel._pool) == pool_size - next_channel.reset_mock() - with mock.patch.object( - type(client.transport._grpc_channel._pool[0]), "unary_unary" - ) as unary_unary: - # calling an rpc `pool_size` times should use a different channel each time - channel_next = None - for i in range(pool_size): - channel_last = channel_next - channel_next = client.transport.grpc_channel._pool[i] - assert channel_last != channel_next - next_channel.return_value = channel_next - client.transport.ping_and_warm() - assert next_channel.call_count == i + 1 - unary_unary.assert_called_once() - unary_unary.reset_mock() - await client.close() - - -@pytest.mark.asyncio -async def test_channel_pool_replace(): - with mock.patch.object(asyncio, "sleep"): pool_size = 7 - client = _make_one(project="project-id", pool_size=pool_size) - for replace_idx in range(pool_size): - start_pool = [channel for channel in client.transport._grpc_channel._pool] - grace_period = 9 + + with mock.patch.object(PooledChannel, "next_channel") as next_channel: + client = self._make_one(project="project-id", pool_size=pool_size) + assert len(client.transport._grpc_channel._pool) == pool_size + next_channel.reset_mock() with mock.patch.object( - type(client.transport._grpc_channel._pool[0]), "close" - ) as close: - new_channel = grpc.aio.insecure_channel("localhost:8080") - await client.transport.replace_channel( - replace_idx, grace=grace_period, new_channel=new_channel - ) - close.assert_called_once_with(grace=grace_period) - close.assert_awaited_once() - assert client.transport._grpc_channel._pool[replace_idx] == new_channel - for i in range(pool_size): - if i != replace_idx: - assert client.transport._grpc_channel._pool[i] == start_pool[i] - else: - assert client.transport._grpc_channel._pool[i] != start_pool[i] + type(client.transport._grpc_channel._pool[0]), "unary_unary" + ) as unary_unary: + # calling an rpc `pool_size` times should use a different channel each time + channel_next = None + for i in range(pool_size): + channel_last = channel_next + channel_next = client.transport.grpc_channel._pool[i] + assert channel_last != channel_next + next_channel.return_value = channel_next + client.transport.ping_and_warm() + assert next_channel.call_count == i + 1 + unary_unary.assert_called_once() + unary_unary.reset_mock() await client.close() + @pytest.mark.asyncio + async def test_channel_pool_replace(self): + with mock.patch.object(asyncio, "sleep"): + pool_size = 7 + client = self._make_one(project="project-id", pool_size=pool_size) + for replace_idx in range(pool_size): + start_pool = [ + channel for channel in client.transport._grpc_channel._pool + ] + grace_period = 9 + with mock.patch.object( + type(client.transport._grpc_channel._pool[0]), "close" + ) as close: + new_channel = grpc.aio.insecure_channel("localhost:8080") + await client.transport.replace_channel( + replace_idx, grace=grace_period, new_channel=new_channel + ) + close.assert_called_once_with(grace=grace_period) + close.assert_awaited_once() + assert client.transport._grpc_channel._pool[replace_idx] == new_channel + for i in range(pool_size): + if i != replace_idx: + assert client.transport._grpc_channel._pool[i] == start_pool[i] + else: + assert client.transport._grpc_channel._pool[i] != start_pool[i] + await client.close() -@pytest.mark.filterwarnings("ignore::RuntimeWarning") -def test_start_background_channel_refresh_sync(): - # should raise RuntimeError if called in a sync context - client = _make_one(project="project-id") - with pytest.raises(RuntimeError): - client.start_background_channel_refresh() - + @pytest.mark.filterwarnings("ignore::RuntimeWarning") + def test_start_background_channel_refresh_sync(self): + # should raise RuntimeError if called in a sync context + client = self._make_one(project="project-id") + with pytest.raises(RuntimeError): + client.start_background_channel_refresh() + + @pytest.mark.asyncio + async def test_start_background_channel_refresh_tasks_exist(self): + # if tasks exist, should do nothing + client = self._make_one(project="project-id") + with mock.patch.object(asyncio, "create_task") as create_task: + client.start_background_channel_refresh() + create_task.assert_not_called() + await client.close() -@pytest.mark.asyncio -async def test_start_background_channel_refresh_tasks_exist(): - # if tasks exist, should do nothing - client = _make_one(project="project-id") - with mock.patch.object(asyncio, "create_task") as create_task: + @pytest.mark.asyncio + @pytest.mark.parametrize("pool_size", [1, 3, 7]) + async def test_start_background_channel_refresh(self, pool_size): + # should create background tasks for each channel + client = self._make_one(project="project-id", pool_size=pool_size) + ping_and_warm = AsyncMock() + client._ping_and_warm_instances = ping_and_warm client.start_background_channel_refresh() - create_task.assert_not_called() - await client.close() - - -@pytest.mark.asyncio -@pytest.mark.parametrize("pool_size", [1, 3, 7]) -async def test_start_background_channel_refresh(pool_size): - # should create background tasks for each channel - client = _make_one(project="project-id", pool_size=pool_size) - ping_and_warm = AsyncMock() - client._ping_and_warm_instances = ping_and_warm - client.start_background_channel_refresh() - assert len(client._channel_refresh_tasks) == pool_size - for task in client._channel_refresh_tasks: - assert isinstance(task, asyncio.Task) - await asyncio.sleep(0.1) - assert ping_and_warm.call_count == pool_size - for channel in client.transport._grpc_channel._pool: - ping_and_warm.assert_any_call(channel) - await client.close() - - -@pytest.mark.asyncio -@pytest.mark.skipif( - sys.version_info < (3, 8), reason="Task.name requires python3.8 or higher" -) -async def test_start_background_channel_refresh_tasks_names(): - # if tasks exist, should do nothing - pool_size = 3 - client = _make_one(project="project-id", pool_size=pool_size) - for i in range(pool_size): - name = client._channel_refresh_tasks[i].get_name() - assert str(i) in name - assert "BigtableDataClient channel refresh " in name - await client.close() - - -@pytest.mark.asyncio -async def test__ping_and_warm_instances(): - # test with no instances - with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: - client = _make_one(project="project-id", pool_size=1) - channel = client.transport._grpc_channel._pool[0] - await client._ping_and_warm_instances(channel) - gather.assert_called_once() - gather.assert_awaited_once() - assert not gather.call_args.args - assert gather.call_args.kwargs == {"return_exceptions": True} - # test with instances - client._active_instances = [ - "instance-1", - "instance-2", - "instance-3", - "instance-4", - ] - with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: - await client._ping_and_warm_instances(channel) - gather.assert_called_once() - gather.assert_awaited_once() - assert len(gather.call_args.args) == 4 - assert gather.call_args.kwargs == {"return_exceptions": True} - for idx, call in enumerate(gather.call_args.args): - assert isinstance(call, grpc.aio.UnaryUnaryCall) - call._request["name"] = client._active_instances[idx] - await client.close() - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "refresh_interval, wait_time, expected_sleep", - [ - (0, 0, 0), - (0, 1, 0), - (10, 0, 10), - (10, 5, 5), - (10, 10, 0), - (10, 15, 0), - ], -) -async def test__manage_channel_first_sleep(refresh_interval, wait_time, expected_sleep): - # first sleep time should be `refresh_interval` seconds after client init - import time + assert len(client._channel_refresh_tasks) == pool_size + for task in client._channel_refresh_tasks: + assert isinstance(task, asyncio.Task) + await asyncio.sleep(0.1) + assert ping_and_warm.call_count == pool_size + for channel in client.transport._grpc_channel._pool: + ping_and_warm.assert_any_call(channel) + await client.close() - with mock.patch.object(time, "time") as time: - time.return_value = 0 - with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = asyncio.CancelledError - try: - client = _make_one(project="project-id") - client._channel_init_time = -wait_time - await client._manage_channel(0, refresh_interval, refresh_interval) - except asyncio.CancelledError: - pass - sleep.assert_called_once() - call_time = sleep.call_args[0][0] - assert ( - abs(call_time - expected_sleep) < 0.1 - ), f"refresh_interval: {refresh_interval}, wait_time: {wait_time}, expected_sleep: {expected_sleep}" - await client.close() + @pytest.mark.asyncio + @pytest.mark.skipif( + sys.version_info < (3, 8), reason="Task.name requires python3.8 or higher" + ) + async def test_start_background_channel_refresh_tasks_names(self): + # if tasks exist, should do nothing + pool_size = 3 + client = self._make_one(project="project-id", pool_size=pool_size) + for i in range(pool_size): + name = client._channel_refresh_tasks[i].get_name() + assert str(i) in name + assert "BigtableDataClient channel refresh " in name + await client.close() + @pytest.mark.asyncio + async def test__ping_and_warm_instances(self): + # test with no instances + with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: + client = self._make_one(project="project-id", pool_size=1) + channel = client.transport._grpc_channel._pool[0] + await client._ping_and_warm_instances(channel) + gather.assert_called_once() + gather.assert_awaited_once() + assert not gather.call_args.args + assert gather.call_args.kwargs == {"return_exceptions": True} + # test with instances + client._active_instances = [ + "instance-1", + "instance-2", + "instance-3", + "instance-4", + ] + with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: + await client._ping_and_warm_instances(channel) + gather.assert_called_once() + gather.assert_awaited_once() + assert len(gather.call_args.args) == 4 + assert gather.call_args.kwargs == {"return_exceptions": True} + for idx, call in enumerate(gather.call_args.args): + assert isinstance(call, grpc.aio.UnaryUnaryCall) + call._request["name"] = client._active_instances[idx] + await client.close() -@pytest.mark.asyncio -async def test__manage_channel_ping_and_warm(): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledBigtableGrpcAsyncIOTransport, + @pytest.mark.asyncio + @pytest.mark.parametrize( + "refresh_interval, wait_time, expected_sleep", + [ + (0, 0, 0), + (0, 1, 0), + (10, 0, 10), + (10, 5, 5), + (10, 10, 0), + (10, 15, 0), + ], ) + async def test__manage_channel_first_sleep( + self, refresh_interval, wait_time, expected_sleep + ): + # first sleep time should be `refresh_interval` seconds after client init + import time - # should ping an warm all new channels, and old channels if sleeping - client = _make_one(project="project-id") - new_channel = grpc.aio.insecure_channel("localhost:8080") - with mock.patch.object(asyncio, "sleep"): - create_channel = mock.Mock() - create_channel.return_value = new_channel - client.transport.grpc_channel._create_channel = create_channel - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "replace_channel" - ) as replace_channel: - replace_channel.side_effect = asyncio.CancelledError - # should ping and warm old channel then new if sleep > 0 - with mock.patch.object( - type(_make_one()), "_ping_and_warm_instances" - ) as ping_and_warm: - try: - channel_idx = 2 - old_channel = client.transport._grpc_channel._pool[channel_idx] - await client._manage_channel(channel_idx, 10) - except asyncio.CancelledError: - pass - assert ping_and_warm.call_count == 2 - assert old_channel != new_channel - called_with = [call[0][0] for call in ping_and_warm.call_args_list] - assert old_channel in called_with - assert new_channel in called_with - # should ping and warm instantly new channel only if not sleeping - with mock.patch.object( - type(_make_one()), "_ping_and_warm_instances" - ) as ping_and_warm: - try: - await client._manage_channel(0, 0, 0) - except asyncio.CancelledError: - pass - ping_and_warm.assert_called_once_with(new_channel) - await client.close() - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "refresh_interval, num_cycles, expected_sleep", - [ - (None, 1, 60 * 35), - (10, 10, 100), - (10, 1, 10), - ], -) -async def test__manage_channel_sleeps(refresh_interval, num_cycles, expected_sleep): - # make sure that sleeps work as expected - import time - import random - - channel_idx = 1 - with mock.patch.object(random, "uniform") as uniform: - uniform.side_effect = lambda min_, max_: min_ with mock.patch.object(time, "time") as time: time.return_value = 0 with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = [None for i in range(num_cycles - 1)] + [ - asyncio.CancelledError - ] + sleep.side_effect = asyncio.CancelledError try: - client = _make_one(project="project-id") - if refresh_interval is not None: - await client._manage_channel( - channel_idx, refresh_interval, refresh_interval - ) - else: - await client._manage_channel(channel_idx) + client = self._make_one(project="project-id") + client._channel_init_time = -wait_time + await client._manage_channel(0, refresh_interval, refresh_interval) except asyncio.CancelledError: pass - assert sleep.call_count == num_cycles - total_sleep = sum([call[0][0] for call in sleep.call_args_list]) + sleep.assert_called_once() + call_time = sleep.call_args[0][0] assert ( - abs(total_sleep - expected_sleep) < 0.1 - ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" - await client.close() - + abs(call_time - expected_sleep) < 0.1 + ), f"refresh_interval: {refresh_interval}, wait_time: {wait_time}, expected_sleep: {expected_sleep}" + await client.close() + + @pytest.mark.asyncio + async def test__manage_channel_ping_and_warm(self): + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) -@pytest.mark.asyncio -async def test__manage_channel_random(): - import random + # should ping an warm all new channels, and old channels if sleeping + client = self._make_one(project="project-id") + new_channel = grpc.aio.insecure_channel("localhost:8080") + with mock.patch.object(asyncio, "sleep"): + create_channel = mock.Mock() + create_channel.return_value = new_channel + client.transport.grpc_channel._create_channel = create_channel + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "replace_channel" + ) as replace_channel: + replace_channel.side_effect = asyncio.CancelledError + # should ping and warm old channel then new if sleep > 0 + with mock.patch.object( + type(self._make_one()), "_ping_and_warm_instances" + ) as ping_and_warm: + try: + channel_idx = 2 + old_channel = client.transport._grpc_channel._pool[channel_idx] + await client._manage_channel(channel_idx, 10) + except asyncio.CancelledError: + pass + assert ping_and_warm.call_count == 2 + assert old_channel != new_channel + called_with = [call[0][0] for call in ping_and_warm.call_args_list] + assert old_channel in called_with + assert new_channel in called_with + # should ping and warm instantly new channel only if not sleeping + with mock.patch.object( + type(self._make_one()), "_ping_and_warm_instances" + ) as ping_and_warm: + try: + await client._manage_channel(0, 0, 0) + except asyncio.CancelledError: + pass + ping_and_warm.assert_called_once_with(new_channel) + await client.close() - with mock.patch.object(asyncio, "sleep") as sleep: + @pytest.mark.asyncio + @pytest.mark.parametrize( + "refresh_interval, num_cycles, expected_sleep", + [ + (None, 1, 60 * 35), + (10, 10, 100), + (10, 1, 10), + ], + ) + async def test__manage_channel_sleeps( + self, refresh_interval, num_cycles, expected_sleep + ): + # make sure that sleeps work as expected + import time + import random + + channel_idx = 1 with mock.patch.object(random, "uniform") as uniform: - uniform.return_value = 0 - try: - uniform.side_effect = asyncio.CancelledError - client = _make_one(project="project-id", pool_size=1) - except asyncio.CancelledError: - uniform.side_effect = None - uniform.reset_mock() - sleep.reset_mock() - min_val = 200 - max_val = 205 uniform.side_effect = lambda min_, max_: min_ - sleep.side_effect = [None, None, asyncio.CancelledError] - try: - await client._manage_channel(0, min_val, max_val) - except asyncio.CancelledError: - pass - assert uniform.call_count == 2 - uniform_args = [call[0] for call in uniform.call_args_list] - for found_min, found_max in uniform_args: - assert found_min == min_val - assert found_max == max_val - - -@pytest.mark.asyncio -@pytest.mark.parametrize("num_cycles", [0, 1, 10, 100]) -async def test__manage_channel_refresh(num_cycles): - # make sure that channels are properly refreshed - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledBigtableGrpcAsyncIOTransport, - ) - from google.api_core import grpc_helpers_async + with mock.patch.object(time, "time") as time: + time.return_value = 0 + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = [None for i in range(num_cycles - 1)] + [ + asyncio.CancelledError + ] + try: + client = self._make_one(project="project-id") + if refresh_interval is not None: + await client._manage_channel( + channel_idx, refresh_interval, refresh_interval + ) + else: + await client._manage_channel(channel_idx) + except asyncio.CancelledError: + pass + assert sleep.call_count == num_cycles + total_sleep = sum([call[0][0] for call in sleep.call_args_list]) + assert ( + abs(total_sleep - expected_sleep) < 0.1 + ), f"refresh_interval={refresh_interval}, num_cycles={num_cycles}, expected_sleep={expected_sleep}" + await client.close() - expected_grace = 9 - expected_refresh = 0.5 - channel_idx = 1 - new_channel = grpc.aio.insecure_channel("localhost:8080") + @pytest.mark.asyncio + async def test__manage_channel_random(self): + import random - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "replace_channel" - ) as replace_channel: with mock.patch.object(asyncio, "sleep") as sleep: - sleep.side_effect = [None for i in range(num_cycles)] + [ - asyncio.CancelledError - ] - with mock.patch.object( - grpc_helpers_async, "create_channel" - ) as create_channel: - create_channel.return_value = new_channel - client = _make_one(project="project-id") - create_channel.reset_mock() + with mock.patch.object(random, "uniform") as uniform: + uniform.return_value = 0 try: - await client._manage_channel( - channel_idx, - refresh_interval_min=expected_refresh, - refresh_interval_max=expected_refresh, - grace_period=expected_grace, - ) + uniform.side_effect = asyncio.CancelledError + client = self._make_one(project="project-id", pool_size=1) + except asyncio.CancelledError: + uniform.side_effect = None + uniform.reset_mock() + sleep.reset_mock() + min_val = 200 + max_val = 205 + uniform.side_effect = lambda min_, max_: min_ + sleep.side_effect = [None, None, asyncio.CancelledError] + try: + await client._manage_channel(0, min_val, max_val) except asyncio.CancelledError: pass - assert sleep.call_count == num_cycles + 1 - assert create_channel.call_count == num_cycles - assert replace_channel.call_count == num_cycles - for call in replace_channel.call_args_list: - args, kwargs = call - assert args[0] == channel_idx - assert kwargs["grace"] == expected_grace - assert kwargs["new_channel"] == new_channel - await client.close() + assert uniform.call_count == 2 + uniform_args = [call[0] for call in uniform.call_args_list] + for found_min, found_max in uniform_args: + assert found_min == min_val + assert found_max == max_val + + @pytest.mark.asyncio + @pytest.mark.parametrize("num_cycles", [0, 1, 10, 100]) + async def test__manage_channel_refresh(self, num_cycles): + # make sure that channels are properly refreshed + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) + from google.api_core import grpc_helpers_async + + expected_grace = 9 + expected_refresh = 0.5 + channel_idx = 1 + new_channel = grpc.aio.insecure_channel("localhost:8080") + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "replace_channel" + ) as replace_channel: + with mock.patch.object(asyncio, "sleep") as sleep: + sleep.side_effect = [None for i in range(num_cycles)] + [ + asyncio.CancelledError + ] + with mock.patch.object( + grpc_helpers_async, "create_channel" + ) as create_channel: + create_channel.return_value = new_channel + client = self._make_one(project="project-id") + create_channel.reset_mock() + try: + await client._manage_channel( + channel_idx, + refresh_interval_min=expected_refresh, + refresh_interval_max=expected_refresh, + grace_period=expected_grace, + ) + except asyncio.CancelledError: + pass + assert sleep.call_count == num_cycles + 1 + assert create_channel.call_count == num_cycles + assert replace_channel.call_count == num_cycles + for call in replace_channel.call_args_list: + args, kwargs = call + assert args[0] == channel_idx + assert kwargs["grace"] == expected_grace + assert kwargs["new_channel"] == new_channel + await client.close() + + @pytest.mark.asyncio + @pytest.mark.filterwarnings("ignore::RuntimeWarning") + async def test__register_instance(self): + # create the client without calling start_background_channel_refresh + with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: + get_event_loop.side_effect = RuntimeError("no event loop") + client = self._make_one(project="project-id") + assert not client._channel_refresh_tasks + # first call should start background refresh + assert client._active_instances == set() + await client._register_instance("instance-1", mock.Mock()) + assert len(client._active_instances) == 1 + assert client._active_instances == {"projects/project-id/instances/instance-1"} + assert client._channel_refresh_tasks + # next call should not + with mock.patch.object( + type(self._make_one()), "start_background_channel_refresh" + ) as refresh_mock: + await client._register_instance("instance-2", mock.Mock()) + assert len(client._active_instances) == 2 + assert client._active_instances == { + "projects/project-id/instances/instance-1", + "projects/project-id/instances/instance-2", + } + refresh_mock.assert_not_called() + + @pytest.mark.asyncio + @pytest.mark.filterwarnings("ignore::RuntimeWarning") + async def test__register_instance_ping_and_warm(self): + # should ping and warm each new instance + pool_size = 7 + with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: + get_event_loop.side_effect = RuntimeError("no event loop") + client = self._make_one(project="project-id", pool_size=pool_size) + # first call should start background refresh + assert not client._channel_refresh_tasks + await client._register_instance("instance-1", mock.Mock()) + client = self._make_one(project="project-id", pool_size=pool_size) + assert len(client._channel_refresh_tasks) == pool_size + assert not client._active_instances + # next calls should trigger ping and warm + with mock.patch.object( + type(self._make_one()), "_ping_and_warm_instances" + ) as ping_mock: + # new instance should trigger ping and warm + await client._register_instance("instance-2", mock.Mock()) + assert ping_mock.call_count == pool_size + await client._register_instance("instance-3", mock.Mock()) + assert ping_mock.call_count == pool_size * 2 + # duplcate instances should not trigger ping and warm + await client._register_instance("instance-3", mock.Mock()) + assert ping_mock.call_count == pool_size * 2 + await client.close() -@pytest.mark.asyncio -@pytest.mark.filterwarnings("ignore::RuntimeWarning") -async def test__register_instance(): - # create the client without calling start_background_channel_refresh - with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: - get_event_loop.side_effect = RuntimeError("no event loop") - client = _make_one(project="project-id") - assert not client._channel_refresh_tasks - # first call should start background refresh - assert client._active_instances == set() - await client._register_instance("instance-1", mock.Mock()) - assert len(client._active_instances) == 1 - assert client._active_instances == {"projects/project-id/instances/instance-1"} - assert client._channel_refresh_tasks - # next call should not - with mock.patch.object( - type(_make_one()), "start_background_channel_refresh" - ) as refresh_mock: - await client._register_instance("instance-2", mock.Mock()) + @pytest.mark.asyncio + async def test__remove_instance_registration(self): + client = self._make_one(project="project-id") + table = mock.Mock() + await client._register_instance("instance-1", table) + await client._register_instance("instance-2", table) assert len(client._active_instances) == 2 - assert client._active_instances == { - "projects/project-id/instances/instance-1", - "projects/project-id/instances/instance-2", - } - refresh_mock.assert_not_called() - - -@pytest.mark.asyncio -@pytest.mark.filterwarnings("ignore::RuntimeWarning") -async def test__register_instance_ping_and_warm(): - # should ping and warm each new instance - pool_size = 7 - with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: - get_event_loop.side_effect = RuntimeError("no event loop") - client = _make_one(project="project-id", pool_size=pool_size) - # first call should start background refresh - assert not client._channel_refresh_tasks - await client._register_instance("instance-1", mock.Mock()) - client = _make_one(project="project-id", pool_size=pool_size) - assert len(client._channel_refresh_tasks) == pool_size - assert not client._active_instances - # next calls should trigger ping and warm - with mock.patch.object(type(_make_one()), "_ping_and_warm_instances") as ping_mock: - # new instance should trigger ping and warm - await client._register_instance("instance-2", mock.Mock()) - assert ping_mock.call_count == pool_size - await client._register_instance("instance-3", mock.Mock()) - assert ping_mock.call_count == pool_size * 2 - # duplcate instances should not trigger ping and warm - await client._register_instance("instance-3", mock.Mock()) - assert ping_mock.call_count == pool_size * 2 - await client.close() - - -@pytest.mark.asyncio -async def test__remove_instance_registration(): - client = _make_one(project="project-id") - table = mock.Mock() - await client._register_instance("instance-1", table) - await client._register_instance("instance-2", table) - assert len(client._active_instances) == 2 - assert len(client._instance_owners.keys()) == 2 - instance_1_path = client._gapic_client.instance_path(client.project, "instance-1") - instance_2_path = client._gapic_client.instance_path(client.project, "instance-2") - assert len(client._instance_owners[instance_1_path]) == 1 - assert list(client._instance_owners[instance_1_path])[0] == id(table) - assert len(client._instance_owners[instance_2_path]) == 1 - assert list(client._instance_owners[instance_2_path])[0] == id(table) - success = await client._remove_instance_registration("instance-1", table) - assert success - assert len(client._active_instances) == 1 - assert len(client._instance_owners[instance_1_path]) == 0 - assert len(client._instance_owners[instance_2_path]) == 1 - assert client._active_instances == {"projects/project-id/instances/instance-2"} - success = await client._remove_instance_registration("nonexistant", table) - assert not success - assert len(client._active_instances) == 1 - await client.close() - - -@pytest.mark.asyncio -async def test__multiple_table_registration(): - async with _make_one(project="project-id") as client: - async with client.get_table("instance_1", "table_1") as table_1: - instance_1_path = client._gapic_client.instance_path( - client.project, "instance_1" - ) - assert len(client._instance_owners[instance_1_path]) == 1 - assert len(client._active_instances) == 1 - assert id(table_1) in client._instance_owners[instance_1_path] - async with client.get_table("instance_1", "table_2") as table_2: - assert len(client._instance_owners[instance_1_path]) == 2 - assert len(client._active_instances) == 1 - assert id(table_1) in client._instance_owners[instance_1_path] - assert id(table_2) in client._instance_owners[instance_1_path] - # table_2 should be unregistered, but instance should still be active - assert len(client._active_instances) == 1 - assert instance_1_path in client._active_instances - assert id(table_2) not in client._instance_owners[instance_1_path] - # both tables are gone. instance should be unregistered - assert len(client._active_instances) == 0 - assert instance_1_path not in client._active_instances + assert len(client._instance_owners.keys()) == 2 + instance_1_path = client._gapic_client.instance_path( + client.project, "instance-1" + ) + instance_2_path = client._gapic_client.instance_path( + client.project, "instance-2" + ) + assert len(client._instance_owners[instance_1_path]) == 1 + assert list(client._instance_owners[instance_1_path])[0] == id(table) + assert len(client._instance_owners[instance_2_path]) == 1 + assert list(client._instance_owners[instance_2_path])[0] == id(table) + success = await client._remove_instance_registration("instance-1", table) + assert success + assert len(client._active_instances) == 1 assert len(client._instance_owners[instance_1_path]) == 0 + assert len(client._instance_owners[instance_2_path]) == 1 + assert client._active_instances == {"projects/project-id/instances/instance-2"} + success = await client._remove_instance_registration("nonexistant", table) + assert not success + assert len(client._active_instances) == 1 + await client.close() - -@pytest.mark.asyncio -async def test__multiple_instance_registration(): - async with _make_one(project="project-id") as client: - async with client.get_table("instance_1", "table_1") as table_1: - async with client.get_table("instance_2", "table_2") as table_2: + @pytest.mark.asyncio + async def test__multiple_table_registration(self): + async with self._make_one(project="project-id") as client: + async with client.get_table("instance_1", "table_1") as table_1: instance_1_path = client._gapic_client.instance_path( client.project, "instance_1" ) - instance_2_path = client._gapic_client.instance_path( - client.project, "instance_2" - ) assert len(client._instance_owners[instance_1_path]) == 1 - assert len(client._instance_owners[instance_2_path]) == 1 - assert len(client._active_instances) == 2 + assert len(client._active_instances) == 1 + assert id(table_1) in client._instance_owners[instance_1_path] + async with client.get_table("instance_1", "table_2") as table_2: + assert len(client._instance_owners[instance_1_path]) == 2 + assert len(client._active_instances) == 1 + assert id(table_1) in client._instance_owners[instance_1_path] + assert id(table_2) in client._instance_owners[instance_1_path] + # table_2 should be unregistered, but instance should still be active + assert len(client._active_instances) == 1 + assert instance_1_path in client._active_instances + assert id(table_2) not in client._instance_owners[instance_1_path] + # both tables are gone. instance should be unregistered + assert len(client._active_instances) == 0 + assert instance_1_path not in client._active_instances + assert len(client._instance_owners[instance_1_path]) == 0 + + @pytest.mark.asyncio + async def test__multiple_instance_registration(self): + async with self._make_one(project="project-id") as client: + async with client.get_table("instance_1", "table_1") as table_1: + async with client.get_table("instance_2", "table_2") as table_2: + instance_1_path = client._gapic_client.instance_path( + client.project, "instance_1" + ) + instance_2_path = client._gapic_client.instance_path( + client.project, "instance_2" + ) + assert len(client._instance_owners[instance_1_path]) == 1 + assert len(client._instance_owners[instance_2_path]) == 1 + assert len(client._active_instances) == 2 + assert id(table_1) in client._instance_owners[instance_1_path] + assert id(table_2) in client._instance_owners[instance_2_path] + # instance2 should be unregistered, but instance1 should still be active + assert len(client._active_instances) == 1 + assert instance_1_path in client._active_instances + assert len(client._instance_owners[instance_2_path]) == 0 + assert len(client._instance_owners[instance_1_path]) == 1 assert id(table_1) in client._instance_owners[instance_1_path] - assert id(table_2) in client._instance_owners[instance_2_path] - # instance2 should be unregistered, but instance1 should still be active - assert len(client._active_instances) == 1 - assert instance_1_path in client._active_instances + # both tables are gone. instances should both be unregistered + assert len(client._active_instances) == 0 + assert len(client._instance_owners[instance_1_path]) == 0 assert len(client._instance_owners[instance_2_path]) == 0 - assert len(client._instance_owners[instance_1_path]) == 1 - assert id(table_1) in client._instance_owners[instance_1_path] - # both tables are gone. instances should both be unregistered - assert len(client._active_instances) == 0 - assert len(client._instance_owners[instance_1_path]) == 0 - assert len(client._instance_owners[instance_2_path]) == 0 - - -@pytest.mark.asyncio -async def test_get_table(): - from google.cloud.bigtable.client import Table - - client = _make_one(project="project-id") - assert not client._active_instances - expected_table_id = "table-id" - expected_instance_id = "instance-id" - expected_app_profile_id = "app-profile-id" - table = client.get_table( - expected_instance_id, - expected_table_id, - expected_app_profile_id, - ) - await asyncio.sleep(0) - assert isinstance(table, Table) - assert table.table_id == expected_table_id - assert ( - table.table_name - == f"projects/{client.project}/instances/{expected_instance_id}/tables/{expected_table_id}" - ) - assert table.instance_id == expected_instance_id - assert ( - table.instance_name - == f"projects/{client.project}/instances/{expected_instance_id}" - ) - assert table.app_profile_id == expected_app_profile_id - assert table.client is client - assert table.instance_name in client._active_instances - await client.close() - - -@pytest.mark.asyncio -async def test_get_table_context_manager(): - from google.cloud.bigtable.client import Table - - expected_table_id = "table-id" - expected_instance_id = "instance-id" - expected_app_profile_id = "app-profile-id" - expected_project_id = "project-id" - - with mock.patch.object(Table, "close") as close_mock: - async with _make_one(project=expected_project_id) as client: - async with client.get_table( - expected_instance_id, - expected_table_id, - expected_app_profile_id, - ) as table: - await asyncio.sleep(0) - assert isinstance(table, Table) - assert table.table_id == expected_table_id - assert ( - table.table_name - == f"projects/{expected_project_id}/instances/{expected_instance_id}/tables/{expected_table_id}" - ) - assert table.instance_id == expected_instance_id - assert ( - table.instance_name - == f"projects/{expected_project_id}/instances/{expected_instance_id}" - ) - assert table.app_profile_id == expected_app_profile_id - assert table.client is client - assert table.instance_name in client._active_instances - assert close_mock.call_count == 1 - - -@pytest.mark.asyncio -async def test_multiple_pool_sizes(): - # should be able to create multiple clients with different pool sizes without issue - pool_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256] - for pool_size in pool_sizes: - client = _make_one(project="project-id", pool_size=pool_size) - assert len(client._channel_refresh_tasks) == pool_size - client_duplicate = _make_one(project="project-id", pool_size=pool_size) - assert len(client_duplicate._channel_refresh_tasks) == pool_size - assert str(pool_size) in str(client.transport) + + @pytest.mark.asyncio + async def test_get_table(self): + from google.cloud.bigtable.client import Table + + client = self._make_one(project="project-id") + assert not client._active_instances + expected_table_id = "table-id" + expected_instance_id = "instance-id" + expected_app_profile_id = "app-profile-id" + table = client.get_table( + expected_instance_id, + expected_table_id, + expected_app_profile_id, + ) + await asyncio.sleep(0) + assert isinstance(table, Table) + assert table.table_id == expected_table_id + assert ( + table.table_name + == f"projects/{client.project}/instances/{expected_instance_id}/tables/{expected_table_id}" + ) + assert table.instance_id == expected_instance_id + assert ( + table.instance_name + == f"projects/{client.project}/instances/{expected_instance_id}" + ) + assert table.app_profile_id == expected_app_profile_id + assert table.client is client + assert table.instance_name in client._active_instances await client.close() - await client_duplicate.close() + @pytest.mark.asyncio + async def test_get_table_context_manager(self): + from google.cloud.bigtable.client import Table + + expected_table_id = "table-id" + expected_instance_id = "instance-id" + expected_app_profile_id = "app-profile-id" + expected_project_id = "project-id" + + with mock.patch.object(Table, "close") as close_mock: + async with self._make_one(project=expected_project_id) as client: + async with client.get_table( + expected_instance_id, + expected_table_id, + expected_app_profile_id, + ) as table: + await asyncio.sleep(0) + assert isinstance(table, Table) + assert table.table_id == expected_table_id + assert ( + table.table_name + == f"projects/{expected_project_id}/instances/{expected_instance_id}/tables/{expected_table_id}" + ) + assert table.instance_id == expected_instance_id + assert ( + table.instance_name + == f"projects/{expected_project_id}/instances/{expected_instance_id}" + ) + assert table.app_profile_id == expected_app_profile_id + assert table.client is client + assert table.instance_name in client._active_instances + assert close_mock.call_count == 1 + + @pytest.mark.asyncio + async def test_multiple_pool_sizes(self): + # should be able to create multiple clients with different pool sizes without issue + pool_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256] + for pool_size in pool_sizes: + client = self._make_one(project="project-id", pool_size=pool_size) + assert len(client._channel_refresh_tasks) == pool_size + client_duplicate = self._make_one(project="project-id", pool_size=pool_size) + assert len(client_duplicate._channel_refresh_tasks) == pool_size + assert str(pool_size) in str(client.transport) + await client.close() + await client_duplicate.close() -@pytest.mark.asyncio -async def test_close(): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledBigtableGrpcAsyncIOTransport, - ) + @pytest.mark.asyncio + async def test_close(self): + from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( + PooledBigtableGrpcAsyncIOTransport, + ) - pool_size = 7 - client = _make_one(project="project-id", pool_size=pool_size) - assert len(client._channel_refresh_tasks) == pool_size - tasks_list = list(client._channel_refresh_tasks) - for task in client._channel_refresh_tasks: - assert not task.done() - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "close", AsyncMock() - ) as close_mock: + pool_size = 7 + client = self._make_one(project="project-id", pool_size=pool_size) + assert len(client._channel_refresh_tasks) == pool_size + tasks_list = list(client._channel_refresh_tasks) + for task in client._channel_refresh_tasks: + assert not task.done() + with mock.patch.object( + PooledBigtableGrpcAsyncIOTransport, "close", AsyncMock() + ) as close_mock: + await client.close() + close_mock.assert_called_once() + close_mock.assert_awaited() + for task in tasks_list: + assert task.done() + assert task.cancelled() + assert client._channel_refresh_tasks == [] + + @pytest.mark.asyncio + async def test_close_with_timeout(self): + pool_size = 7 + expected_timeout = 19 + client = self._make_one(project="project-id", pool_size=pool_size) + tasks = list(client._channel_refresh_tasks) + with mock.patch.object(asyncio, "wait_for", AsyncMock()) as wait_for_mock: + await client.close(timeout=expected_timeout) + wait_for_mock.assert_called_once() + wait_for_mock.assert_awaited() + assert wait_for_mock.call_args[1]["timeout"] == expected_timeout + client._channel_refresh_tasks = tasks await client.close() + + @pytest.mark.asyncio + async def test_context_manager(self): + # context manager should close the client cleanly + close_mock = AsyncMock() + true_close = None + async with self._make_one(project="project-id") as client: + true_close = client.close() + client.close = close_mock + for task in client._channel_refresh_tasks: + assert not task.done() + assert client.project == "project-id" + assert client._active_instances == set() + close_mock.assert_not_called() close_mock.assert_called_once() close_mock.assert_awaited() - for task in tasks_list: - assert task.done() - assert task.cancelled() - assert client._channel_refresh_tasks == [] - - -@pytest.mark.asyncio -async def test_close_with_timeout(): - pool_size = 7 - expected_timeout = 19 - client = _make_one(project="project-id", pool_size=pool_size) - tasks = list(client._channel_refresh_tasks) - with mock.patch.object(asyncio, "wait_for", AsyncMock()) as wait_for_mock: - await client.close(timeout=expected_timeout) - wait_for_mock.assert_called_once() - wait_for_mock.assert_awaited() - assert wait_for_mock.call_args[1]["timeout"] == expected_timeout - client._channel_refresh_tasks = tasks - await client.close() - - -@pytest.mark.asyncio -async def test_context_manager(): - # context manager should close the client cleanly - close_mock = AsyncMock() - true_close = None - async with _make_one(project="project-id") as client: - true_close = client.close() - client.close = close_mock - for task in client._channel_refresh_tasks: - assert not task.done() + # actually close the client + await true_close + + def test_client_ctor_sync(self): + # initializing client in a sync context should raise RuntimeError + from google.cloud.bigtable.client import BigtableDataClient + + with pytest.warns(RuntimeWarning) as warnings: + client = BigtableDataClient(project="project-id") + expected_warning = [w for w in warnings if "client.py" in w.filename] + assert len(expected_warning) == 1 + assert "BigtableDataClient should be started in an asyncio event loop." in str( + expected_warning[0].message + ) assert client.project == "project-id" - assert client._active_instances == set() - close_mock.assert_not_called() - close_mock.assert_called_once() - close_mock.assert_awaited() - # actually close the client - await true_close - - -def test_client_ctor_sync(): - # initializing client in a sync context should raise RuntimeError - from google.cloud.bigtable.client import BigtableDataClient - - with pytest.warns(RuntimeWarning) as warnings: - client = BigtableDataClient(project="project-id") - expected_warning = [w for w in warnings if "client.py" in w.filename] - assert len(expected_warning) == 1 - assert "BigtableDataClient should be started in an asyncio event loop." in str( - expected_warning[0].message - ) - assert client.project == "project-id" - assert client._channel_refresh_tasks == [] - - -###################################################################### -# Table Tests -###################################################################### - - -@pytest.mark.asyncio -async def test_table_ctor(): - from google.cloud.bigtable.client import BigtableDataClient - from google.cloud.bigtable.client import Table - - expected_table_id = "table-id" - expected_instance_id = "instance-id" - expected_app_profile_id = "app-profile-id" - expected_operation_timeout = 123 - expected_per_row_timeout = 21 - expected_per_request_timeout = 12 - client = BigtableDataClient() - assert not client._active_instances - - table = Table( - client, - expected_instance_id, - expected_table_id, - expected_app_profile_id, - default_operation_timeout=expected_operation_timeout, - default_per_row_timeout=expected_per_row_timeout, - default_per_request_timeout=expected_per_request_timeout, - ) - await asyncio.sleep(0) - assert table.table_id == expected_table_id - assert table.instance_id == expected_instance_id - assert table.app_profile_id == expected_app_profile_id - assert table.client is client - assert table.instance_name in client._active_instances - assert table.default_operation_timeout == expected_operation_timeout - assert table.default_per_row_timeout == expected_per_row_timeout - assert table.default_per_request_timeout == expected_per_request_timeout - # ensure task reaches completion - await table._register_instance_task - assert table._register_instance_task.done() - assert not table._register_instance_task.cancelled() - assert table._register_instance_task.exception() is None - await client.close() - - -@pytest.mark.asyncio -async def test_table_ctor_bad_timeout_values(): - from google.cloud.bigtable.client import BigtableDataClient - from google.cloud.bigtable.client import Table - - client = BigtableDataClient() - - with pytest.raises(ValueError) as e: - Table(client, "", "", default_per_row_timeout=-1) - assert "default_per_row_timeout must be greater than 0" in str(e.value) - with pytest.raises(ValueError) as e: - Table(client, "", "", default_per_request_timeout=-1) - assert "default_per_request_timeout must be greater than 0" in str(e.value) - with pytest.raises(ValueError) as e: - Table(client, "", "", default_operation_timeout=-1) - assert "default_operation_timeout must be greater than 0" in str(e.value) - with pytest.raises(ValueError) as e: - Table( - client, "", "", default_operation_timeout=1, default_per_request_timeout=2 + assert client._channel_refresh_tasks == [] + + +class TestTable: + @pytest.mark.asyncio + async def test_table_ctor(self): + from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.client import Table + + expected_table_id = "table-id" + expected_instance_id = "instance-id" + expected_app_profile_id = "app-profile-id" + expected_operation_timeout = 123 + expected_per_row_timeout = 21 + expected_per_request_timeout = 12 + client = BigtableDataClient() + assert not client._active_instances + + table = Table( + client, + expected_instance_id, + expected_table_id, + expected_app_profile_id, + default_operation_timeout=expected_operation_timeout, + default_per_row_timeout=expected_per_row_timeout, + default_per_request_timeout=expected_per_request_timeout, ) - assert ( - "default_per_request_timeout must be less than default_operation_timeout" - in str(e.value) - ) - await client.close() + await asyncio.sleep(0) + assert table.table_id == expected_table_id + assert table.instance_id == expected_instance_id + assert table.app_profile_id == expected_app_profile_id + assert table.client is client + assert table.instance_name in client._active_instances + assert table.default_operation_timeout == expected_operation_timeout + assert table.default_per_row_timeout == expected_per_row_timeout + assert table.default_per_request_timeout == expected_per_request_timeout + # ensure task reaches completion + await table._register_instance_task + assert table._register_instance_task.done() + assert not table._register_instance_task.cancelled() + assert table._register_instance_task.exception() is None + await client.close() + @pytest.mark.asyncio + async def test_table_ctor_bad_timeout_values(self): + from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.client import Table + + client = BigtableDataClient() + + with pytest.raises(ValueError) as e: + Table(client, "", "", default_per_row_timeout=-1) + assert "default_per_row_timeout must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + Table(client, "", "", default_per_request_timeout=-1) + assert "default_per_request_timeout must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + Table(client, "", "", default_operation_timeout=-1) + assert "default_operation_timeout must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + Table( + client, + "", + "", + default_operation_timeout=1, + default_per_request_timeout=2, + ) + assert ( + "default_per_request_timeout must be less than default_operation_timeout" + in str(e.value) + ) + await client.close() -def test_table_ctor_sync(): - # initializing client in a sync context should raise RuntimeError - from google.cloud.bigtable.client import Table + def test_table_ctor_sync(self): + # initializing client in a sync context should raise RuntimeError + from google.cloud.bigtable.client import Table - client = mock.Mock() - with pytest.raises(RuntimeError) as e: - Table(client, "instance-id", "table-id") - assert e.match("Table must be created within an async event loop context.") + client = mock.Mock() + with pytest.raises(RuntimeError) as e: + Table(client, "instance-id", "table-id") + assert e.match("Table must be created within an async event loop context.") From 72eca75f39a6d46fea7757b4f502a935da7d2e45 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 14:39:47 -0700 Subject: [PATCH 327/349] restructured test_client_read_rows --- tests/unit/test_client_read_rows.py | 940 ++++++++++++++-------------- 1 file changed, 487 insertions(+), 453 deletions(-) diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py index 1ec4c93a7..70f544503 100644 --- a/tests/unit/test_client_read_rows.py +++ b/tests/unit/test_client_read_rows.py @@ -32,495 +32,529 @@ from mock import AsyncMock # type: ignore -def _make_client(*args, **kwargs): - from google.cloud.bigtable.client import BigtableDataClient - - return BigtableDataClient(*args, **kwargs) - - -def _make_stats(): - from google.cloud.bigtable_v2.types import RequestStats - from google.cloud.bigtable_v2.types import FullReadStatsView - from google.cloud.bigtable_v2.types import ReadIterationStats - - return RequestStats( - full_read_stats_view=FullReadStatsView( - read_iteration_stats=ReadIterationStats( - rows_seen_count=1, - rows_returned_count=2, - cells_seen_count=3, - cells_returned_count=4, +class TestReadRows: + def _make_client(self, *args, **kwargs): + from google.cloud.bigtable.client import BigtableDataClient + + return BigtableDataClient(*args, **kwargs) + + def _make_stats(self): + from google.cloud.bigtable_v2.types import RequestStats + from google.cloud.bigtable_v2.types import FullReadStatsView + from google.cloud.bigtable_v2.types import ReadIterationStats + + return RequestStats( + full_read_stats_view=FullReadStatsView( + read_iteration_stats=ReadIterationStats( + rows_seen_count=1, + rows_returned_count=2, + cells_seen_count=3, + cells_returned_count=4, + ) ) ) - ) - -def _make_chunk(*args, **kwargs): - from google.cloud.bigtable_v2 import ReadRowsResponse - - kwargs["row_key"] = kwargs.get("row_key", b"row_key") - kwargs["family_name"] = kwargs.get("family_name", "family_name") - kwargs["qualifier"] = kwargs.get("qualifier", b"qualifier") - kwargs["value"] = kwargs.get("value", b"value") - kwargs["commit_row"] = kwargs.get("commit_row", True) - - return ReadRowsResponse.CellChunk(*args, **kwargs) - - -async def _make_gapic_stream( - chunk_list: list[ReadRowsResponse.CellChunk | Exception], - request_stats: RequestStats | None = None, - sleep_time=0, -): - from google.cloud.bigtable_v2 import ReadRowsResponse - - async def inner(): - for chunk in chunk_list: - if sleep_time: - await asyncio.sleep(sleep_time) - if isinstance(chunk, Exception): - raise chunk - else: - yield ReadRowsResponse(chunks=[chunk]) - if request_stats: - yield ReadRowsResponse(request_stats=request_stats) - - return inner() - - -@pytest.mark.asyncio -async def test_read_rows(): - client = _make_client() - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [_make_chunk(row_key=b"test_1"), _make_chunk(row_key=b"test_2")] - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) - results = await table.read_rows(query, operation_timeout=3) - assert len(results) == 2 - assert results[0].row_key == b"test_1" - assert results[1].row_key == b"test_2" - await client.close() - - -@pytest.mark.asyncio -async def test_read_rows_stream(): - client = _make_client() - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [_make_chunk(row_key=b"test_1"), _make_chunk(row_key=b"test_2")] - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) - gen = await table.read_rows_stream(query, operation_timeout=3) - results = [row async for row in gen] - assert len(results) == 2 - assert results[0].row_key == b"test_1" - assert results[1].row_key == b"test_2" - await client.close() - - -@pytest.mark.parametrize("include_app_profile", [True, False]) -@pytest.mark.asyncio -async def test_read_rows_query_matches_request(include_app_profile): - from google.cloud.bigtable import RowRange - - async with _make_client() as client: - app_profile_id = "app_profile_id" if include_app_profile else None - table = client.get_table("instance", "table", app_profile_id=app_profile_id) - row_keys = [b"test_1", "test_2"] - row_ranges = RowRange("start", "end") - filter_ = {"test": "filter"} - limit = 99 - query = ReadRowsQuery( - row_keys=row_keys, row_ranges=row_ranges, row_filter=filter_, limit=limit - ) - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream([]) - results = await table.read_rows(query, operation_timeout=3) - assert len(results) == 0 - call_request = read_rows.call_args_list[0][0][0] - query_dict = query._to_dict() - if include_app_profile: - assert set(call_request.keys()) == set(query_dict.keys()) | { - "table_name", - "app_profile_id", - } - else: - assert set(call_request.keys()) == set(query_dict.keys()) | { - "table_name" - } - assert call_request["rows"] == query_dict["rows"] - assert call_request["filter"] == filter_ - assert call_request["rows_limit"] == limit - assert call_request["table_name"] == table.table_name - if include_app_profile: - assert call_request["app_profile_id"] == app_profile_id - - -@pytest.mark.parametrize( - "input_buffer_size, expected_buffer_size", - [(-100, 0), (-1, 0), (0, 0), (1, 1), (2, 2), (100, 100), (101, 101)], -) -@pytest.mark.asyncio -async def test_read_rows_buffer_size(input_buffer_size, expected_buffer_size): - async with _make_client() as client: + def _make_chunk(self, *args, **kwargs): + from google.cloud.bigtable_v2 import ReadRowsResponse + + kwargs["row_key"] = kwargs.get("row_key", b"row_key") + kwargs["family_name"] = kwargs.get("family_name", "family_name") + kwargs["qualifier"] = kwargs.get("qualifier", b"qualifier") + kwargs["value"] = kwargs.get("value", b"value") + kwargs["commit_row"] = kwargs.get("commit_row", True) + + return ReadRowsResponse.CellChunk(*args, **kwargs) + + async def _make_gapic_stream( + self, + chunk_list: list[ReadRowsResponse.CellChunk | Exception], + request_stats: RequestStats | None = None, + sleep_time=0, + ): + from google.cloud.bigtable_v2 import ReadRowsResponse + + async def inner(): + for chunk in chunk_list: + if sleep_time: + await asyncio.sleep(sleep_time) + if isinstance(chunk, Exception): + raise chunk + else: + yield ReadRowsResponse(chunks=[chunk]) + if request_stats: + yield ReadRowsResponse(request_stats=request_stats) + + return inner() + + @pytest.mark.asyncio + async def test_read_rows(self): + client = self._make_client() table = client.get_table("instance", "table") query = ReadRowsQuery() - chunks = [_make_chunk(row_key=b"test_1")] + chunks = [ + self._make_chunk(row_key=b"test_1"), + self._make_chunk(row_key=b"test_2"), + ] with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) - with mock.patch.object(asyncio, "Queue") as queue: - queue.side_effect = asyncio.CancelledError - try: - gen = await table.read_rows_stream( - query, operation_timeout=3, buffer_size=input_buffer_size - ) - [row async for row in gen] - except asyncio.CancelledError: - pass - queue.assert_called_once_with(maxsize=expected_buffer_size) - + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks + ) + results = await table.read_rows(query, operation_timeout=3) + assert len(results) == 2 + assert results[0].row_key == b"test_1" + assert results[1].row_key == b"test_2" + await client.close() -@pytest.mark.parametrize("operation_timeout", [0.001, 0.023, 0.1]) -@pytest.mark.asyncio -async def test_read_rows_timeout(operation_timeout): - async with _make_client() as client: + @pytest.mark.asyncio + async def test_read_rows_stream(self): + client = self._make_client() table = client.get_table("instance", "table") query = ReadRowsQuery() - chunks = [_make_chunk(row_key=b"test_1")] + chunks = [ + self._make_chunk(row_key=b"test_1"), + self._make_chunk(row_key=b"test_2"), + ] with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( - chunks, sleep_time=1 + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks ) - try: - await table.read_rows(query, operation_timeout=operation_timeout) - except core_exceptions.DeadlineExceeded as e: - assert ( - e.message - == f"operation_timeout of {operation_timeout:0.1f}s exceeded" - ) - + gen = await table.read_rows_stream(query, operation_timeout=3) + results = [row async for row in gen] + assert len(results) == 2 + assert results[0].row_key == b"test_1" + assert results[1].row_key == b"test_2" + await client.close() -@pytest.mark.parametrize( - "per_row_t, operation_t, expected_num", - [ - (0.1, 0.01, 0), - (0.1, 0.19, 1), - (0.05, 0.54, 10), - (0.05, 0.14, 2), - (0.05, 0.24, 4), - ], -) -@pytest.mark.asyncio -async def test_read_rows_per_row_timeout(per_row_t, operation_t, expected_num): - from google.cloud.bigtable.exceptions import RetryExceptionGroup - - # mocking uniform ensures there are no sleeps between retries - with mock.patch("random.uniform", side_effect=lambda a, b: 0): - async with _make_client() as client: + @pytest.mark.parametrize("include_app_profile", [True, False]) + @pytest.mark.asyncio + async def test_read_rows_query_matches_request(self, include_app_profile): + from google.cloud.bigtable import RowRange + + async with self._make_client() as client: + app_profile_id = "app_profile_id" if include_app_profile else None + table = client.get_table("instance", "table", app_profile_id=app_profile_id) + row_keys = [b"test_1", "test_2"] + row_ranges = RowRange("start", "end") + filter_ = {"test": "filter"} + limit = 99 + query = ReadRowsQuery( + row_keys=row_keys, + row_ranges=row_ranges, + row_filter=filter_, + limit=limit, + ) + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + [] + ) + results = await table.read_rows(query, operation_timeout=3) + assert len(results) == 0 + call_request = read_rows.call_args_list[0][0][0] + query_dict = query._to_dict() + if include_app_profile: + assert set(call_request.keys()) == set(query_dict.keys()) | { + "table_name", + "app_profile_id", + } + else: + assert set(call_request.keys()) == set(query_dict.keys()) | { + "table_name" + } + assert call_request["rows"] == query_dict["rows"] + assert call_request["filter"] == filter_ + assert call_request["rows_limit"] == limit + assert call_request["table_name"] == table.table_name + if include_app_profile: + assert call_request["app_profile_id"] == app_profile_id + + @pytest.mark.parametrize( + "input_buffer_size, expected_buffer_size", + [(-100, 0), (-1, 0), (0, 0), (1, 1), (2, 2), (100, 100), (101, 101)], + ) + @pytest.mark.asyncio + async def test_read_rows_buffer_size(self, input_buffer_size, expected_buffer_size): + async with self._make_client() as client: table = client.get_table("instance", "table") query = ReadRowsQuery() - chunks = [_make_chunk(row_key=b"test_1")] + chunks = [self._make_chunk(row_key=b"test_1")] with mock.patch.object( table.client._gapic_client, "read_rows" ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( - chunks, sleep_time=5 + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks + ) + with mock.patch.object(asyncio, "Queue") as queue: + queue.side_effect = asyncio.CancelledError + try: + gen = await table.read_rows_stream( + query, operation_timeout=3, buffer_size=input_buffer_size + ) + [row async for row in gen] + except asyncio.CancelledError: + pass + queue.assert_called_once_with(maxsize=expected_buffer_size) + + @pytest.mark.parametrize("operation_timeout", [0.001, 0.023, 0.1]) + @pytest.mark.asyncio + async def test_read_rows_timeout(self, operation_timeout): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [self._make_chunk(row_key=b"test_1")] + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks, sleep_time=1 ) try: - await table.read_rows( - query, per_row_timeout=per_row_t, operation_timeout=operation_t + await table.read_rows(query, operation_timeout=operation_timeout) + except core_exceptions.DeadlineExceeded as e: + assert ( + e.message + == f"operation_timeout of {operation_timeout:0.1f}s exceeded" ) - except core_exceptions.DeadlineExceeded as deadline_exc: - retry_exc = deadline_exc.__cause__ - if expected_num == 0: - assert retry_exc is None - else: - assert type(retry_exc) == RetryExceptionGroup - assert f"{expected_num} failed attempts" in str(retry_exc) - assert len(retry_exc.exceptions) == expected_num - for sub_exc in retry_exc.exceptions: - assert ( - sub_exc.message - == f"per_row_timeout of {per_row_t:0.1f}s exceeded" - ) - -@pytest.mark.parametrize( - "per_request_t, operation_t, expected_num", - [ - (0.05, 0.09, 1), - (0.05, 0.54, 10), - (0.05, 0.14, 2), - (0.05, 0.24, 4), - ], -) -@pytest.mark.asyncio -async def test_read_rows_per_request_timeout(per_request_t, operation_t, expected_num): - from google.cloud.bigtable.exceptions import RetryExceptionGroup - - # mocking uniform ensures there are no sleeps between retries - with mock.patch("random.uniform", side_effect=lambda a, b: 0): - async with _make_client() as client: + @pytest.mark.parametrize( + "per_row_t, operation_t, expected_num", + [ + (0.1, 0.01, 0), + (0.1, 0.19, 1), + (0.05, 0.54, 10), + (0.05, 0.14, 2), + (0.05, 0.24, 4), + ], + ) + @pytest.mark.asyncio + async def test_read_rows_per_row_timeout( + self, per_row_t, operation_t, expected_num + ): + from google.cloud.bigtable.exceptions import RetryExceptionGroup + + # mocking uniform ensures there are no sleeps between retries + with mock.patch("random.uniform", side_effect=lambda a, b: 0): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [self._make_chunk(row_key=b"test_1")] + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = ( + lambda *args, **kwargs: self._make_gapic_stream( + chunks, sleep_time=5 + ) + ) + try: + await table.read_rows( + query, + per_row_timeout=per_row_t, + operation_timeout=operation_t, + ) + except core_exceptions.DeadlineExceeded as deadline_exc: + retry_exc = deadline_exc.__cause__ + if expected_num == 0: + assert retry_exc is None + else: + assert type(retry_exc) == RetryExceptionGroup + assert f"{expected_num} failed attempts" in str(retry_exc) + assert len(retry_exc.exceptions) == expected_num + for sub_exc in retry_exc.exceptions: + assert ( + sub_exc.message + == f"per_row_timeout of {per_row_t:0.1f}s exceeded" + ) + + @pytest.mark.parametrize( + "per_request_t, operation_t, expected_num", + [ + (0.05, 0.09, 1), + (0.05, 0.54, 10), + (0.05, 0.14, 2), + (0.05, 0.24, 4), + ], + ) + @pytest.mark.asyncio + async def test_read_rows_per_request_timeout( + self, per_request_t, operation_t, expected_num + ): + from google.cloud.bigtable.exceptions import RetryExceptionGroup + + # mocking uniform ensures there are no sleeps between retries + with mock.patch("random.uniform", side_effect=lambda a, b: 0): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [core_exceptions.DeadlineExceeded("mock deadline")] + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = ( + lambda *args, **kwargs: self._make_gapic_stream( + chunks, sleep_time=per_request_t + ) + ) + try: + await table.read_rows( + query, + operation_timeout=operation_t, + per_request_timeout=per_request_t, + ) + except core_exceptions.DeadlineExceeded as e: + retry_exc = e.__cause__ + if expected_num == 0: + assert retry_exc is None + else: + assert type(retry_exc) == RetryExceptionGroup + assert f"{expected_num} failed attempts" in str(retry_exc) + assert len(retry_exc.exceptions) == expected_num + for sub_exc in retry_exc.exceptions: + assert sub_exc.message == "mock deadline" + assert read_rows.call_count == expected_num + 1 + called_kwargs = read_rows.call_args[1] + assert called_kwargs["timeout"] == per_request_t + + @pytest.mark.asyncio + async def test_read_rows_idle_timeout(self): + from google.cloud.bigtable.client import ReadRowsIterator + from google.cloud.bigtable_v2.services.bigtable.async_client import ( + BigtableAsyncClient, + ) + from google.cloud.bigtable.exceptions import IdleTimeout + from google.cloud.bigtable._read_rows import _ReadRowsOperation + + chunks = [ + self._make_chunk(row_key=b"test_1"), + self._make_chunk(row_key=b"test_2"), + ] + with mock.patch.object(BigtableAsyncClient, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks + ) + with mock.patch.object( + ReadRowsIterator, "_start_idle_timer" + ) as start_idle_timer: + client = self._make_client() + table = client.get_table("instance", "table") + query = ReadRowsQuery() + gen = await table.read_rows_stream(query) + # should start idle timer on creation + start_idle_timer.assert_called_once() + with mock.patch.object(_ReadRowsOperation, "aclose", AsyncMock()) as aclose: + # start idle timer with our own value + await gen._start_idle_timer(0.1) + # should timeout after being abandoned + await gen.__anext__() + await asyncio.sleep(0.2) + # generator should be expired + assert not gen.active + assert type(gen._merger_or_error) == IdleTimeout + assert gen._idle_timeout_task is None + await client.close() + with pytest.raises(IdleTimeout) as e: + await gen.__anext__() + + expected_msg = ( + "Timed out waiting for next Row to be consumed. (idle_timeout=0.1s)" + ) + assert e.value.message == expected_msg + aclose.assert_called_once() + aclose.assert_awaited() + + @pytest.mark.parametrize( + "exc_type", + [ + core_exceptions.Aborted, + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + ], + ) + @pytest.mark.asyncio + async def test_read_rows_retryable_error(self, exc_type): + async with self._make_client() as client: table = client.get_table("instance", "table") query = ReadRowsQuery() - chunks = [core_exceptions.DeadlineExceeded("mock deadline")] + expected_error = exc_type("mock error") with mock.patch.object( table.client._gapic_client, "read_rows" ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( - chunks, sleep_time=per_request_t + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + [expected_error] ) try: - await table.read_rows( - query, - operation_timeout=operation_t, - per_request_timeout=per_request_t, - ) + await table.read_rows(query, operation_timeout=0.1) except core_exceptions.DeadlineExceeded as e: retry_exc = e.__cause__ - if expected_num == 0: - assert retry_exc is None - else: - assert type(retry_exc) == RetryExceptionGroup - assert f"{expected_num} failed attempts" in str(retry_exc) - assert len(retry_exc.exceptions) == expected_num - for sub_exc in retry_exc.exceptions: - assert sub_exc.message == "mock deadline" - assert read_rows.call_count == expected_num + 1 - called_kwargs = read_rows.call_args[1] - assert called_kwargs["timeout"] == per_request_t - - -@pytest.mark.asyncio -async def test_read_rows_idle_timeout(): - from google.cloud.bigtable.client import ReadRowsIterator - from google.cloud.bigtable_v2.services.bigtable.async_client import ( - BigtableAsyncClient, + root_cause = retry_exc.exceptions[0] + assert type(root_cause) == exc_type + assert root_cause == expected_error + + @pytest.mark.parametrize( + "exc_type", + [ + core_exceptions.Cancelled, + core_exceptions.PreconditionFailed, + core_exceptions.NotFound, + core_exceptions.PermissionDenied, + core_exceptions.Conflict, + core_exceptions.InternalServerError, + core_exceptions.TooManyRequests, + core_exceptions.ResourceExhausted, + InvalidChunk, + ], ) - from google.cloud.bigtable.exceptions import IdleTimeout - from google.cloud.bigtable._read_rows import _ReadRowsOperation - - chunks = [_make_chunk(row_key=b"test_1"), _make_chunk(row_key=b"test_2")] - with mock.patch.object(BigtableAsyncClient, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream(chunks) - with mock.patch.object( - ReadRowsIterator, "_start_idle_timer" - ) as start_idle_timer: - client = _make_client() + @pytest.mark.asyncio + async def test_read_rows_non_retryable_error(self, exc_type): + async with self._make_client() as client: table = client.get_table("instance", "table") query = ReadRowsQuery() - gen = await table.read_rows_stream(query) - # should start idle timer on creation - start_idle_timer.assert_called_once() - with mock.patch.object(_ReadRowsOperation, "aclose", AsyncMock()) as aclose: - # start idle timer with our own value - await gen._start_idle_timer(0.1) - # should timeout after being abandoned - await gen.__anext__() - await asyncio.sleep(0.2) - # generator should be expired - assert not gen.active - assert type(gen._merger_or_error) == IdleTimeout - assert gen._idle_timeout_task is None - await client.close() - with pytest.raises(IdleTimeout) as e: - await gen.__anext__() - - expected_msg = ( - "Timed out waiting for next Row to be consumed. (idle_timeout=0.1s)" - ) - assert e.value.message == expected_msg - aclose.assert_called_once() - aclose.assert_awaited() - - -@pytest.mark.parametrize( - "exc_type", - [ - core_exceptions.Aborted, - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - ], -) -@pytest.mark.asyncio -async def test_read_rows_retryable_error(exc_type): - async with _make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - expected_error = exc_type("mock error") - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( - [expected_error] - ) - try: - await table.read_rows(query, operation_timeout=0.1) - except core_exceptions.DeadlineExceeded as e: - retry_exc = e.__cause__ - root_cause = retry_exc.exceptions[0] - assert type(root_cause) == exc_type - assert root_cause == expected_error - - -@pytest.mark.parametrize( - "exc_type", - [ - core_exceptions.Cancelled, - core_exceptions.PreconditionFailed, - core_exceptions.NotFound, - core_exceptions.PermissionDenied, - core_exceptions.Conflict, - core_exceptions.InternalServerError, - core_exceptions.TooManyRequests, - core_exceptions.ResourceExhausted, - InvalidChunk, - ], -) -@pytest.mark.asyncio -async def test_read_rows_non_retryable_error(exc_type): - async with _make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - expected_error = exc_type("mock error") - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( - [expected_error] - ) - try: - await table.read_rows(query, operation_timeout=0.1) - except exc_type as e: - assert e == expected_error + expected_error = exc_type("mock error") + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + [expected_error] + ) + try: + await table.read_rows(query, operation_timeout=0.1) + except exc_type as e: + assert e == expected_error + @pytest.mark.asyncio + async def test_read_rows_request_stats(self): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [self._make_chunk(row_key=b"test_1")] + stats = self._make_stats() + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks, request_stats=stats + ) + gen = await table.read_rows_stream(query) + [row async for row in gen] + assert gen.request_stats == stats -@pytest.mark.asyncio -async def test_read_rows_request_stats(): - async with _make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [_make_chunk(row_key=b"test_1")] - stats = _make_stats() - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( - chunks, request_stats=stats - ) - gen = await table.read_rows_stream(query) - [row async for row in gen] - assert gen.request_stats == stats + @pytest.mark.asyncio + async def test_read_rows_request_stats_missing(self): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [self._make_chunk(row_key=b"test_1")] + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks, request_stats=None + ) + gen = await table.read_rows_stream(query) + [row async for row in gen] + assert gen.request_stats is None + @pytest.mark.asyncio + async def test_read_rows_revise_request(self): + from google.cloud.bigtable._read_rows import _ReadRowsOperation -@pytest.mark.asyncio -async def test_read_rows_request_stats_missing(): - async with _make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [_make_chunk(row_key=b"test_1")] - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( - chunks, request_stats=None - ) - gen = await table.read_rows_stream(query) - [row async for row in gen] - assert gen.request_stats is None - - -@pytest.mark.asyncio -async def test_read_rows_revise_request(): - from google.cloud.bigtable._read_rows import _ReadRowsOperation - - with mock.patch.object( - _ReadRowsOperation, "_revise_request_rowset" - ) as revise_rowset: - with mock.patch.object(_ReadRowsOperation, "aclose"): - revise_rowset.side_effect = [ - "modified", - core_exceptions.Cancelled("mock error"), - ] - async with _make_client() as client: - table = client.get_table("instance", "table") - row_keys = [b"test_1", b"test_2", b"test_3"] - query = ReadRowsQuery(row_keys=row_keys) - chunks = [ - _make_chunk(row_key=b"test_1"), - core_exceptions.Aborted("mock retryable error"), + with mock.patch.object( + _ReadRowsOperation, "_revise_request_rowset" + ) as revise_rowset: + with mock.patch.object(_ReadRowsOperation, "aclose"): + revise_rowset.side_effect = [ + "modified", + core_exceptions.Cancelled("mock error"), ] - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: _make_gapic_stream( - chunks, request_stats=None - ) + async with self._make_client() as client: + table = client.get_table("instance", "table") + row_keys = [b"test_1", b"test_2", b"test_3"] + query = ReadRowsQuery(row_keys=row_keys) + chunks = [ + self._make_chunk(row_key=b"test_1"), + core_exceptions.Aborted("mock retryable error"), + ] + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = ( + lambda *args, **kwargs: self._make_gapic_stream( + chunks, request_stats=None + ) + ) + try: + await table.read_rows(query) + except core_exceptions.Cancelled: + revise_rowset.assert_called() + first_call_kwargs = revise_rowset.call_args_list[0].kwargs + assert ( + first_call_kwargs["row_set"] == query._to_dict()["rows"] + ) + assert first_call_kwargs["last_seen_row_key"] == b"test_1" + second_call_kwargs = revise_rowset.call_args_list[1].kwargs + assert second_call_kwargs["row_set"] == "modified" + assert second_call_kwargs["last_seen_row_key"] == b"test_1" + + @pytest.mark.asyncio + async def test_read_rows_default_timeouts(self): + """ + Ensure that the default timeouts are set on the read rows operation when not overridden + """ + from google.cloud.bigtable._read_rows import _ReadRowsOperation + + operation_timeout = 8 + per_row_timeout = 2 + per_request_timeout = 4 + with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: + mock_op.side_effect = RuntimeError("mock error") + async with self._make_client() as client: + async with client.get_table( + "instance", + "table", + default_operation_timeout=operation_timeout, + default_per_row_timeout=per_row_timeout, + default_per_request_timeout=per_request_timeout, + ) as table: try: - await table.read_rows(query) - except core_exceptions.Cancelled: - revise_rowset.assert_called() - first_call_kwargs = revise_rowset.call_args_list[0].kwargs - assert first_call_kwargs["row_set"] == query._to_dict()["rows"] - assert first_call_kwargs["last_seen_row_key"] == b"test_1" - second_call_kwargs = revise_rowset.call_args_list[1].kwargs - assert second_call_kwargs["row_set"] == "modified" - assert second_call_kwargs["last_seen_row_key"] == b"test_1" - - -@pytest.mark.asyncio -async def test_read_rows_default_timeouts(): - """ - Ensure that the default timeouts are set on the read rows operation when not overridden - """ - from google.cloud.bigtable._read_rows import _ReadRowsOperation - - operation_timeout = 8 - per_row_timeout = 2 - per_request_timeout = 4 - with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: - mock_op.side_effect = RuntimeError("mock error") - async with _make_client() as client: - async with client.get_table( - "instance", - "table", - default_operation_timeout=operation_timeout, - default_per_row_timeout=per_row_timeout, - default_per_request_timeout=per_request_timeout, - ) as table: - try: - await table.read_rows(ReadRowsQuery()) - except RuntimeError: - pass - kwargs = mock_op.call_args_list[0].kwargs - assert kwargs["operation_timeout"] == operation_timeout - assert kwargs["per_row_timeout"] == per_row_timeout - assert kwargs["per_request_timeout"] == per_request_timeout - - -@pytest.mark.asyncio -async def test_read_rows_default_timeout_override(): - """ - When timeouts are passed, they overwrite default values - """ - from google.cloud.bigtable._read_rows import _ReadRowsOperation - - operation_timeout = 8 - per_row_timeout = 2 - per_request_timeout = 4 - with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: - mock_op.side_effect = RuntimeError("mock error") - async with _make_client() as client: - async with client.get_table( - "instance", - "table", - default_operation_timeout=99, - default_per_row_timeout=98, - default_per_request_timeout=97, - ) as table: - try: - await table.read_rows( - ReadRowsQuery(), - operation_timeout=operation_timeout, - per_row_timeout=per_row_timeout, - per_request_timeout=per_request_timeout, - ) - except RuntimeError: - pass - kwargs = mock_op.call_args_list[0].kwargs - assert kwargs["operation_timeout"] == operation_timeout - assert kwargs["per_row_timeout"] == per_row_timeout - assert kwargs["per_request_timeout"] == per_request_timeout + await table.read_rows(ReadRowsQuery()) + except RuntimeError: + pass + kwargs = mock_op.call_args_list[0].kwargs + assert kwargs["operation_timeout"] == operation_timeout + assert kwargs["per_row_timeout"] == per_row_timeout + assert kwargs["per_request_timeout"] == per_request_timeout + + @pytest.mark.asyncio + async def test_read_rows_default_timeout_override(self): + """ + When timeouts are passed, they overwrite default values + """ + from google.cloud.bigtable._read_rows import _ReadRowsOperation + + operation_timeout = 8 + per_row_timeout = 2 + per_request_timeout = 4 + with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: + mock_op.side_effect = RuntimeError("mock error") + async with self._make_client() as client: + async with client.get_table( + "instance", + "table", + default_operation_timeout=99, + default_per_row_timeout=98, + default_per_request_timeout=97, + ) as table: + try: + await table.read_rows( + ReadRowsQuery(), + operation_timeout=operation_timeout, + per_row_timeout=per_row_timeout, + per_request_timeout=per_request_timeout, + ) + except RuntimeError: + pass + kwargs = mock_op.call_args_list[0].kwargs + assert kwargs["operation_timeout"] == operation_timeout + assert kwargs["per_row_timeout"] == per_row_timeout + assert kwargs["per_request_timeout"] == per_request_timeout From ad424367286d14d5f59eb262b5a2db54e408f095 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 14:45:20 -0700 Subject: [PATCH 328/349] moved read rows tests in test_client --- tests/unit/test_client.py | 542 ++++++++++++++++++++++++++- tests/unit/test_client_read_rows.py | 560 ---------------------------- 2 files changed, 540 insertions(+), 562 deletions(-) delete mode 100644 tests/unit/test_client_read_rows.py diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index b7e0353b1..d374b34d8 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -11,16 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +from __future__ import annotations import grpc import asyncio import re import sys -from google.auth.credentials import AnonymousCredentials import pytest +from google.auth.credentials import AnonymousCredentials +from google.cloud.bigtable_v2.types import ReadRowsResponse +from google.cloud.bigtable.read_rows_query import ReadRowsQuery +from google.cloud.bigtable_v2.types import RequestStats +from google.api_core import exceptions as core_exceptions +from google.cloud.bigtable.exceptions import InvalidChunk + # try/except added for compatibility with python < 3.8 try: from unittest import mock @@ -851,3 +857,535 @@ def test_table_ctor_sync(self): with pytest.raises(RuntimeError) as e: Table(client, "instance-id", "table-id") assert e.match("Table must be created within an async event loop context.") + + +class TestReadRows: + """ + Tests for table.read_rows and related methods. + """ + + def _make_client(self, *args, **kwargs): + from google.cloud.bigtable.client import BigtableDataClient + + return BigtableDataClient(*args, **kwargs) + + def _make_stats(self): + from google.cloud.bigtable_v2.types import RequestStats + from google.cloud.bigtable_v2.types import FullReadStatsView + from google.cloud.bigtable_v2.types import ReadIterationStats + + return RequestStats( + full_read_stats_view=FullReadStatsView( + read_iteration_stats=ReadIterationStats( + rows_seen_count=1, + rows_returned_count=2, + cells_seen_count=3, + cells_returned_count=4, + ) + ) + ) + + def _make_chunk(self, *args, **kwargs): + from google.cloud.bigtable_v2 import ReadRowsResponse + + kwargs["row_key"] = kwargs.get("row_key", b"row_key") + kwargs["family_name"] = kwargs.get("family_name", "family_name") + kwargs["qualifier"] = kwargs.get("qualifier", b"qualifier") + kwargs["value"] = kwargs.get("value", b"value") + kwargs["commit_row"] = kwargs.get("commit_row", True) + + return ReadRowsResponse.CellChunk(*args, **kwargs) + + async def _make_gapic_stream( + self, + chunk_list: list[ReadRowsResponse.CellChunk | Exception], + request_stats: RequestStats | None = None, + sleep_time=0, + ): + from google.cloud.bigtable_v2 import ReadRowsResponse + + async def inner(): + for chunk in chunk_list: + if sleep_time: + await asyncio.sleep(sleep_time) + if isinstance(chunk, Exception): + raise chunk + else: + yield ReadRowsResponse(chunks=[chunk]) + if request_stats: + yield ReadRowsResponse(request_stats=request_stats) + + return inner() + + @pytest.mark.asyncio + async def test_read_rows(self): + client = self._make_client() + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [ + self._make_chunk(row_key=b"test_1"), + self._make_chunk(row_key=b"test_2"), + ] + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks + ) + results = await table.read_rows(query, operation_timeout=3) + assert len(results) == 2 + assert results[0].row_key == b"test_1" + assert results[1].row_key == b"test_2" + await client.close() + + @pytest.mark.asyncio + async def test_read_rows_stream(self): + client = self._make_client() + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [ + self._make_chunk(row_key=b"test_1"), + self._make_chunk(row_key=b"test_2"), + ] + with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks + ) + gen = await table.read_rows_stream(query, operation_timeout=3) + results = [row async for row in gen] + assert len(results) == 2 + assert results[0].row_key == b"test_1" + assert results[1].row_key == b"test_2" + await client.close() + + @pytest.mark.parametrize("include_app_profile", [True, False]) + @pytest.mark.asyncio + async def test_read_rows_query_matches_request(self, include_app_profile): + from google.cloud.bigtable import RowRange + + async with self._make_client() as client: + app_profile_id = "app_profile_id" if include_app_profile else None + table = client.get_table("instance", "table", app_profile_id=app_profile_id) + row_keys = [b"test_1", "test_2"] + row_ranges = RowRange("start", "end") + filter_ = {"test": "filter"} + limit = 99 + query = ReadRowsQuery( + row_keys=row_keys, + row_ranges=row_ranges, + row_filter=filter_, + limit=limit, + ) + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + [] + ) + results = await table.read_rows(query, operation_timeout=3) + assert len(results) == 0 + call_request = read_rows.call_args_list[0][0][0] + query_dict = query._to_dict() + if include_app_profile: + assert set(call_request.keys()) == set(query_dict.keys()) | { + "table_name", + "app_profile_id", + } + else: + assert set(call_request.keys()) == set(query_dict.keys()) | { + "table_name" + } + assert call_request["rows"] == query_dict["rows"] + assert call_request["filter"] == filter_ + assert call_request["rows_limit"] == limit + assert call_request["table_name"] == table.table_name + if include_app_profile: + assert call_request["app_profile_id"] == app_profile_id + + @pytest.mark.parametrize( + "input_buffer_size, expected_buffer_size", + [(-100, 0), (-1, 0), (0, 0), (1, 1), (2, 2), (100, 100), (101, 101)], + ) + @pytest.mark.asyncio + async def test_read_rows_buffer_size(self, input_buffer_size, expected_buffer_size): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [self._make_chunk(row_key=b"test_1")] + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks + ) + with mock.patch.object(asyncio, "Queue") as queue: + queue.side_effect = asyncio.CancelledError + try: + gen = await table.read_rows_stream( + query, operation_timeout=3, buffer_size=input_buffer_size + ) + [row async for row in gen] + except asyncio.CancelledError: + pass + queue.assert_called_once_with(maxsize=expected_buffer_size) + + @pytest.mark.parametrize("operation_timeout", [0.001, 0.023, 0.1]) + @pytest.mark.asyncio + async def test_read_rows_timeout(self, operation_timeout): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [self._make_chunk(row_key=b"test_1")] + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks, sleep_time=1 + ) + try: + await table.read_rows(query, operation_timeout=operation_timeout) + except core_exceptions.DeadlineExceeded as e: + assert ( + e.message + == f"operation_timeout of {operation_timeout:0.1f}s exceeded" + ) + + @pytest.mark.parametrize( + "per_row_t, operation_t, expected_num", + [ + (0.1, 0.01, 0), + (0.1, 0.19, 1), + (0.05, 0.54, 10), + (0.05, 0.14, 2), + (0.05, 0.24, 4), + ], + ) + @pytest.mark.asyncio + async def test_read_rows_per_row_timeout( + self, per_row_t, operation_t, expected_num + ): + from google.cloud.bigtable.exceptions import RetryExceptionGroup + + # mocking uniform ensures there are no sleeps between retries + with mock.patch("random.uniform", side_effect=lambda a, b: 0): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [self._make_chunk(row_key=b"test_1")] + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = ( + lambda *args, **kwargs: self._make_gapic_stream( + chunks, sleep_time=5 + ) + ) + try: + await table.read_rows( + query, + per_row_timeout=per_row_t, + operation_timeout=operation_t, + ) + except core_exceptions.DeadlineExceeded as deadline_exc: + retry_exc = deadline_exc.__cause__ + if expected_num == 0: + assert retry_exc is None + else: + assert type(retry_exc) == RetryExceptionGroup + assert f"{expected_num} failed attempts" in str(retry_exc) + assert len(retry_exc.exceptions) == expected_num + for sub_exc in retry_exc.exceptions: + assert ( + sub_exc.message + == f"per_row_timeout of {per_row_t:0.1f}s exceeded" + ) + + @pytest.mark.parametrize( + "per_request_t, operation_t, expected_num", + [ + (0.05, 0.09, 1), + (0.05, 0.54, 10), + (0.05, 0.14, 2), + (0.05, 0.24, 4), + ], + ) + @pytest.mark.asyncio + async def test_read_rows_per_request_timeout( + self, per_request_t, operation_t, expected_num + ): + from google.cloud.bigtable.exceptions import RetryExceptionGroup + + # mocking uniform ensures there are no sleeps between retries + with mock.patch("random.uniform", side_effect=lambda a, b: 0): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [core_exceptions.DeadlineExceeded("mock deadline")] + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = ( + lambda *args, **kwargs: self._make_gapic_stream( + chunks, sleep_time=per_request_t + ) + ) + try: + await table.read_rows( + query, + operation_timeout=operation_t, + per_request_timeout=per_request_t, + ) + except core_exceptions.DeadlineExceeded as e: + retry_exc = e.__cause__ + if expected_num == 0: + assert retry_exc is None + else: + assert type(retry_exc) == RetryExceptionGroup + assert f"{expected_num} failed attempts" in str(retry_exc) + assert len(retry_exc.exceptions) == expected_num + for sub_exc in retry_exc.exceptions: + assert sub_exc.message == "mock deadline" + assert read_rows.call_count == expected_num + 1 + called_kwargs = read_rows.call_args[1] + assert called_kwargs["timeout"] == per_request_t + + @pytest.mark.asyncio + async def test_read_rows_idle_timeout(self): + from google.cloud.bigtable.client import ReadRowsIterator + from google.cloud.bigtable_v2.services.bigtable.async_client import ( + BigtableAsyncClient, + ) + from google.cloud.bigtable.exceptions import IdleTimeout + from google.cloud.bigtable._read_rows import _ReadRowsOperation + + chunks = [ + self._make_chunk(row_key=b"test_1"), + self._make_chunk(row_key=b"test_2"), + ] + with mock.patch.object(BigtableAsyncClient, "read_rows") as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks + ) + with mock.patch.object( + ReadRowsIterator, "_start_idle_timer" + ) as start_idle_timer: + client = self._make_client() + table = client.get_table("instance", "table") + query = ReadRowsQuery() + gen = await table.read_rows_stream(query) + # should start idle timer on creation + start_idle_timer.assert_called_once() + with mock.patch.object(_ReadRowsOperation, "aclose", AsyncMock()) as aclose: + # start idle timer with our own value + await gen._start_idle_timer(0.1) + # should timeout after being abandoned + await gen.__anext__() + await asyncio.sleep(0.2) + # generator should be expired + assert not gen.active + assert type(gen._merger_or_error) == IdleTimeout + assert gen._idle_timeout_task is None + await client.close() + with pytest.raises(IdleTimeout) as e: + await gen.__anext__() + + expected_msg = ( + "Timed out waiting for next Row to be consumed. (idle_timeout=0.1s)" + ) + assert e.value.message == expected_msg + aclose.assert_called_once() + aclose.assert_awaited() + + @pytest.mark.parametrize( + "exc_type", + [ + core_exceptions.Aborted, + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + ], + ) + @pytest.mark.asyncio + async def test_read_rows_retryable_error(self, exc_type): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + expected_error = exc_type("mock error") + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + [expected_error] + ) + try: + await table.read_rows(query, operation_timeout=0.1) + except core_exceptions.DeadlineExceeded as e: + retry_exc = e.__cause__ + root_cause = retry_exc.exceptions[0] + assert type(root_cause) == exc_type + assert root_cause == expected_error + + @pytest.mark.parametrize( + "exc_type", + [ + core_exceptions.Cancelled, + core_exceptions.PreconditionFailed, + core_exceptions.NotFound, + core_exceptions.PermissionDenied, + core_exceptions.Conflict, + core_exceptions.InternalServerError, + core_exceptions.TooManyRequests, + core_exceptions.ResourceExhausted, + InvalidChunk, + ], + ) + @pytest.mark.asyncio + async def test_read_rows_non_retryable_error(self, exc_type): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + expected_error = exc_type("mock error") + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + [expected_error] + ) + try: + await table.read_rows(query, operation_timeout=0.1) + except exc_type as e: + assert e == expected_error + + @pytest.mark.asyncio + async def test_read_rows_request_stats(self): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [self._make_chunk(row_key=b"test_1")] + stats = self._make_stats() + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks, request_stats=stats + ) + gen = await table.read_rows_stream(query) + [row async for row in gen] + assert gen.request_stats == stats + + @pytest.mark.asyncio + async def test_read_rows_request_stats_missing(self): + async with self._make_client() as client: + table = client.get_table("instance", "table") + query = ReadRowsQuery() + chunks = [self._make_chunk(row_key=b"test_1")] + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks, request_stats=None + ) + gen = await table.read_rows_stream(query) + [row async for row in gen] + assert gen.request_stats is None + + @pytest.mark.asyncio + async def test_read_rows_revise_request(self): + from google.cloud.bigtable._read_rows import _ReadRowsOperation + + with mock.patch.object( + _ReadRowsOperation, "_revise_request_rowset" + ) as revise_rowset: + with mock.patch.object(_ReadRowsOperation, "aclose"): + revise_rowset.side_effect = [ + "modified", + core_exceptions.Cancelled("mock error"), + ] + async with self._make_client() as client: + table = client.get_table("instance", "table") + row_keys = [b"test_1", b"test_2", b"test_3"] + query = ReadRowsQuery(row_keys=row_keys) + chunks = [ + self._make_chunk(row_key=b"test_1"), + core_exceptions.Aborted("mock retryable error"), + ] + with mock.patch.object( + table.client._gapic_client, "read_rows" + ) as read_rows: + read_rows.side_effect = ( + lambda *args, **kwargs: self._make_gapic_stream( + chunks, request_stats=None + ) + ) + try: + await table.read_rows(query) + except core_exceptions.Cancelled: + revise_rowset.assert_called() + first_call_kwargs = revise_rowset.call_args_list[0].kwargs + assert ( + first_call_kwargs["row_set"] == query._to_dict()["rows"] + ) + assert first_call_kwargs["last_seen_row_key"] == b"test_1" + second_call_kwargs = revise_rowset.call_args_list[1].kwargs + assert second_call_kwargs["row_set"] == "modified" + assert second_call_kwargs["last_seen_row_key"] == b"test_1" + + @pytest.mark.asyncio + async def test_read_rows_default_timeouts(self): + """ + Ensure that the default timeouts are set on the read rows operation when not overridden + """ + from google.cloud.bigtable._read_rows import _ReadRowsOperation + + operation_timeout = 8 + per_row_timeout = 2 + per_request_timeout = 4 + with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: + mock_op.side_effect = RuntimeError("mock error") + async with self._make_client() as client: + async with client.get_table( + "instance", + "table", + default_operation_timeout=operation_timeout, + default_per_row_timeout=per_row_timeout, + default_per_request_timeout=per_request_timeout, + ) as table: + try: + await table.read_rows(ReadRowsQuery()) + except RuntimeError: + pass + kwargs = mock_op.call_args_list[0].kwargs + assert kwargs["operation_timeout"] == operation_timeout + assert kwargs["per_row_timeout"] == per_row_timeout + assert kwargs["per_request_timeout"] == per_request_timeout + + @pytest.mark.asyncio + async def test_read_rows_default_timeout_override(self): + """ + When timeouts are passed, they overwrite default values + """ + from google.cloud.bigtable._read_rows import _ReadRowsOperation + + operation_timeout = 8 + per_row_timeout = 2 + per_request_timeout = 4 + with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: + mock_op.side_effect = RuntimeError("mock error") + async with self._make_client() as client: + async with client.get_table( + "instance", + "table", + default_operation_timeout=99, + default_per_row_timeout=98, + default_per_request_timeout=97, + ) as table: + try: + await table.read_rows( + ReadRowsQuery(), + operation_timeout=operation_timeout, + per_row_timeout=per_row_timeout, + per_request_timeout=per_request_timeout, + ) + except RuntimeError: + pass + kwargs = mock_op.call_args_list[0].kwargs + assert kwargs["operation_timeout"] == operation_timeout + assert kwargs["per_row_timeout"] == per_row_timeout + assert kwargs["per_request_timeout"] == per_request_timeout diff --git a/tests/unit/test_client_read_rows.py b/tests/unit/test_client_read_rows.py deleted file mode 100644 index 70f544503..000000000 --- a/tests/unit/test_client_read_rows.py +++ /dev/null @@ -1,560 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations - -import asyncio - -import pytest - -from google.cloud.bigtable_v2.types import ReadRowsResponse -from google.cloud.bigtable.read_rows_query import ReadRowsQuery -from google.cloud.bigtable_v2.types import RequestStats -from google.api_core import exceptions as core_exceptions -from google.cloud.bigtable.exceptions import InvalidChunk - -# try/except added for compatibility with python < 3.8 -try: - from unittest import mock - from unittest.mock import AsyncMock # type: ignore -except ImportError: # pragma: NO COVER - import mock # type: ignore - from mock import AsyncMock # type: ignore - - -class TestReadRows: - def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.client import BigtableDataClient - - return BigtableDataClient(*args, **kwargs) - - def _make_stats(self): - from google.cloud.bigtable_v2.types import RequestStats - from google.cloud.bigtable_v2.types import FullReadStatsView - from google.cloud.bigtable_v2.types import ReadIterationStats - - return RequestStats( - full_read_stats_view=FullReadStatsView( - read_iteration_stats=ReadIterationStats( - rows_seen_count=1, - rows_returned_count=2, - cells_seen_count=3, - cells_returned_count=4, - ) - ) - ) - - def _make_chunk(self, *args, **kwargs): - from google.cloud.bigtable_v2 import ReadRowsResponse - - kwargs["row_key"] = kwargs.get("row_key", b"row_key") - kwargs["family_name"] = kwargs.get("family_name", "family_name") - kwargs["qualifier"] = kwargs.get("qualifier", b"qualifier") - kwargs["value"] = kwargs.get("value", b"value") - kwargs["commit_row"] = kwargs.get("commit_row", True) - - return ReadRowsResponse.CellChunk(*args, **kwargs) - - async def _make_gapic_stream( - self, - chunk_list: list[ReadRowsResponse.CellChunk | Exception], - request_stats: RequestStats | None = None, - sleep_time=0, - ): - from google.cloud.bigtable_v2 import ReadRowsResponse - - async def inner(): - for chunk in chunk_list: - if sleep_time: - await asyncio.sleep(sleep_time) - if isinstance(chunk, Exception): - raise chunk - else: - yield ReadRowsResponse(chunks=[chunk]) - if request_stats: - yield ReadRowsResponse(request_stats=request_stats) - - return inner() - - @pytest.mark.asyncio - async def test_read_rows(self): - client = self._make_client() - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [ - self._make_chunk(row_key=b"test_1"), - self._make_chunk(row_key=b"test_2"), - ] - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - chunks - ) - results = await table.read_rows(query, operation_timeout=3) - assert len(results) == 2 - assert results[0].row_key == b"test_1" - assert results[1].row_key == b"test_2" - await client.close() - - @pytest.mark.asyncio - async def test_read_rows_stream(self): - client = self._make_client() - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [ - self._make_chunk(row_key=b"test_1"), - self._make_chunk(row_key=b"test_2"), - ] - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - chunks - ) - gen = await table.read_rows_stream(query, operation_timeout=3) - results = [row async for row in gen] - assert len(results) == 2 - assert results[0].row_key == b"test_1" - assert results[1].row_key == b"test_2" - await client.close() - - @pytest.mark.parametrize("include_app_profile", [True, False]) - @pytest.mark.asyncio - async def test_read_rows_query_matches_request(self, include_app_profile): - from google.cloud.bigtable import RowRange - - async with self._make_client() as client: - app_profile_id = "app_profile_id" if include_app_profile else None - table = client.get_table("instance", "table", app_profile_id=app_profile_id) - row_keys = [b"test_1", "test_2"] - row_ranges = RowRange("start", "end") - filter_ = {"test": "filter"} - limit = 99 - query = ReadRowsQuery( - row_keys=row_keys, - row_ranges=row_ranges, - row_filter=filter_, - limit=limit, - ) - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - [] - ) - results = await table.read_rows(query, operation_timeout=3) - assert len(results) == 0 - call_request = read_rows.call_args_list[0][0][0] - query_dict = query._to_dict() - if include_app_profile: - assert set(call_request.keys()) == set(query_dict.keys()) | { - "table_name", - "app_profile_id", - } - else: - assert set(call_request.keys()) == set(query_dict.keys()) | { - "table_name" - } - assert call_request["rows"] == query_dict["rows"] - assert call_request["filter"] == filter_ - assert call_request["rows_limit"] == limit - assert call_request["table_name"] == table.table_name - if include_app_profile: - assert call_request["app_profile_id"] == app_profile_id - - @pytest.mark.parametrize( - "input_buffer_size, expected_buffer_size", - [(-100, 0), (-1, 0), (0, 0), (1, 1), (2, 2), (100, 100), (101, 101)], - ) - @pytest.mark.asyncio - async def test_read_rows_buffer_size(self, input_buffer_size, expected_buffer_size): - async with self._make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [self._make_chunk(row_key=b"test_1")] - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - chunks - ) - with mock.patch.object(asyncio, "Queue") as queue: - queue.side_effect = asyncio.CancelledError - try: - gen = await table.read_rows_stream( - query, operation_timeout=3, buffer_size=input_buffer_size - ) - [row async for row in gen] - except asyncio.CancelledError: - pass - queue.assert_called_once_with(maxsize=expected_buffer_size) - - @pytest.mark.parametrize("operation_timeout", [0.001, 0.023, 0.1]) - @pytest.mark.asyncio - async def test_read_rows_timeout(self, operation_timeout): - async with self._make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [self._make_chunk(row_key=b"test_1")] - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - chunks, sleep_time=1 - ) - try: - await table.read_rows(query, operation_timeout=operation_timeout) - except core_exceptions.DeadlineExceeded as e: - assert ( - e.message - == f"operation_timeout of {operation_timeout:0.1f}s exceeded" - ) - - @pytest.mark.parametrize( - "per_row_t, operation_t, expected_num", - [ - (0.1, 0.01, 0), - (0.1, 0.19, 1), - (0.05, 0.54, 10), - (0.05, 0.14, 2), - (0.05, 0.24, 4), - ], - ) - @pytest.mark.asyncio - async def test_read_rows_per_row_timeout( - self, per_row_t, operation_t, expected_num - ): - from google.cloud.bigtable.exceptions import RetryExceptionGroup - - # mocking uniform ensures there are no sleeps between retries - with mock.patch("random.uniform", side_effect=lambda a, b: 0): - async with self._make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [self._make_chunk(row_key=b"test_1")] - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = ( - lambda *args, **kwargs: self._make_gapic_stream( - chunks, sleep_time=5 - ) - ) - try: - await table.read_rows( - query, - per_row_timeout=per_row_t, - operation_timeout=operation_t, - ) - except core_exceptions.DeadlineExceeded as deadline_exc: - retry_exc = deadline_exc.__cause__ - if expected_num == 0: - assert retry_exc is None - else: - assert type(retry_exc) == RetryExceptionGroup - assert f"{expected_num} failed attempts" in str(retry_exc) - assert len(retry_exc.exceptions) == expected_num - for sub_exc in retry_exc.exceptions: - assert ( - sub_exc.message - == f"per_row_timeout of {per_row_t:0.1f}s exceeded" - ) - - @pytest.mark.parametrize( - "per_request_t, operation_t, expected_num", - [ - (0.05, 0.09, 1), - (0.05, 0.54, 10), - (0.05, 0.14, 2), - (0.05, 0.24, 4), - ], - ) - @pytest.mark.asyncio - async def test_read_rows_per_request_timeout( - self, per_request_t, operation_t, expected_num - ): - from google.cloud.bigtable.exceptions import RetryExceptionGroup - - # mocking uniform ensures there are no sleeps between retries - with mock.patch("random.uniform", side_effect=lambda a, b: 0): - async with self._make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [core_exceptions.DeadlineExceeded("mock deadline")] - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = ( - lambda *args, **kwargs: self._make_gapic_stream( - chunks, sleep_time=per_request_t - ) - ) - try: - await table.read_rows( - query, - operation_timeout=operation_t, - per_request_timeout=per_request_t, - ) - except core_exceptions.DeadlineExceeded as e: - retry_exc = e.__cause__ - if expected_num == 0: - assert retry_exc is None - else: - assert type(retry_exc) == RetryExceptionGroup - assert f"{expected_num} failed attempts" in str(retry_exc) - assert len(retry_exc.exceptions) == expected_num - for sub_exc in retry_exc.exceptions: - assert sub_exc.message == "mock deadline" - assert read_rows.call_count == expected_num + 1 - called_kwargs = read_rows.call_args[1] - assert called_kwargs["timeout"] == per_request_t - - @pytest.mark.asyncio - async def test_read_rows_idle_timeout(self): - from google.cloud.bigtable.client import ReadRowsIterator - from google.cloud.bigtable_v2.services.bigtable.async_client import ( - BigtableAsyncClient, - ) - from google.cloud.bigtable.exceptions import IdleTimeout - from google.cloud.bigtable._read_rows import _ReadRowsOperation - - chunks = [ - self._make_chunk(row_key=b"test_1"), - self._make_chunk(row_key=b"test_2"), - ] - with mock.patch.object(BigtableAsyncClient, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - chunks - ) - with mock.patch.object( - ReadRowsIterator, "_start_idle_timer" - ) as start_idle_timer: - client = self._make_client() - table = client.get_table("instance", "table") - query = ReadRowsQuery() - gen = await table.read_rows_stream(query) - # should start idle timer on creation - start_idle_timer.assert_called_once() - with mock.patch.object(_ReadRowsOperation, "aclose", AsyncMock()) as aclose: - # start idle timer with our own value - await gen._start_idle_timer(0.1) - # should timeout after being abandoned - await gen.__anext__() - await asyncio.sleep(0.2) - # generator should be expired - assert not gen.active - assert type(gen._merger_or_error) == IdleTimeout - assert gen._idle_timeout_task is None - await client.close() - with pytest.raises(IdleTimeout) as e: - await gen.__anext__() - - expected_msg = ( - "Timed out waiting for next Row to be consumed. (idle_timeout=0.1s)" - ) - assert e.value.message == expected_msg - aclose.assert_called_once() - aclose.assert_awaited() - - @pytest.mark.parametrize( - "exc_type", - [ - core_exceptions.Aborted, - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - ], - ) - @pytest.mark.asyncio - async def test_read_rows_retryable_error(self, exc_type): - async with self._make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - expected_error = exc_type("mock error") - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - [expected_error] - ) - try: - await table.read_rows(query, operation_timeout=0.1) - except core_exceptions.DeadlineExceeded as e: - retry_exc = e.__cause__ - root_cause = retry_exc.exceptions[0] - assert type(root_cause) == exc_type - assert root_cause == expected_error - - @pytest.mark.parametrize( - "exc_type", - [ - core_exceptions.Cancelled, - core_exceptions.PreconditionFailed, - core_exceptions.NotFound, - core_exceptions.PermissionDenied, - core_exceptions.Conflict, - core_exceptions.InternalServerError, - core_exceptions.TooManyRequests, - core_exceptions.ResourceExhausted, - InvalidChunk, - ], - ) - @pytest.mark.asyncio - async def test_read_rows_non_retryable_error(self, exc_type): - async with self._make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - expected_error = exc_type("mock error") - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - [expected_error] - ) - try: - await table.read_rows(query, operation_timeout=0.1) - except exc_type as e: - assert e == expected_error - - @pytest.mark.asyncio - async def test_read_rows_request_stats(self): - async with self._make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [self._make_chunk(row_key=b"test_1")] - stats = self._make_stats() - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - chunks, request_stats=stats - ) - gen = await table.read_rows_stream(query) - [row async for row in gen] - assert gen.request_stats == stats - - @pytest.mark.asyncio - async def test_read_rows_request_stats_missing(self): - async with self._make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [self._make_chunk(row_key=b"test_1")] - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - chunks, request_stats=None - ) - gen = await table.read_rows_stream(query) - [row async for row in gen] - assert gen.request_stats is None - - @pytest.mark.asyncio - async def test_read_rows_revise_request(self): - from google.cloud.bigtable._read_rows import _ReadRowsOperation - - with mock.patch.object( - _ReadRowsOperation, "_revise_request_rowset" - ) as revise_rowset: - with mock.patch.object(_ReadRowsOperation, "aclose"): - revise_rowset.side_effect = [ - "modified", - core_exceptions.Cancelled("mock error"), - ] - async with self._make_client() as client: - table = client.get_table("instance", "table") - row_keys = [b"test_1", b"test_2", b"test_3"] - query = ReadRowsQuery(row_keys=row_keys) - chunks = [ - self._make_chunk(row_key=b"test_1"), - core_exceptions.Aborted("mock retryable error"), - ] - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = ( - lambda *args, **kwargs: self._make_gapic_stream( - chunks, request_stats=None - ) - ) - try: - await table.read_rows(query) - except core_exceptions.Cancelled: - revise_rowset.assert_called() - first_call_kwargs = revise_rowset.call_args_list[0].kwargs - assert ( - first_call_kwargs["row_set"] == query._to_dict()["rows"] - ) - assert first_call_kwargs["last_seen_row_key"] == b"test_1" - second_call_kwargs = revise_rowset.call_args_list[1].kwargs - assert second_call_kwargs["row_set"] == "modified" - assert second_call_kwargs["last_seen_row_key"] == b"test_1" - - @pytest.mark.asyncio - async def test_read_rows_default_timeouts(self): - """ - Ensure that the default timeouts are set on the read rows operation when not overridden - """ - from google.cloud.bigtable._read_rows import _ReadRowsOperation - - operation_timeout = 8 - per_row_timeout = 2 - per_request_timeout = 4 - with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: - mock_op.side_effect = RuntimeError("mock error") - async with self._make_client() as client: - async with client.get_table( - "instance", - "table", - default_operation_timeout=operation_timeout, - default_per_row_timeout=per_row_timeout, - default_per_request_timeout=per_request_timeout, - ) as table: - try: - await table.read_rows(ReadRowsQuery()) - except RuntimeError: - pass - kwargs = mock_op.call_args_list[0].kwargs - assert kwargs["operation_timeout"] == operation_timeout - assert kwargs["per_row_timeout"] == per_row_timeout - assert kwargs["per_request_timeout"] == per_request_timeout - - @pytest.mark.asyncio - async def test_read_rows_default_timeout_override(self): - """ - When timeouts are passed, they overwrite default values - """ - from google.cloud.bigtable._read_rows import _ReadRowsOperation - - operation_timeout = 8 - per_row_timeout = 2 - per_request_timeout = 4 - with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: - mock_op.side_effect = RuntimeError("mock error") - async with self._make_client() as client: - async with client.get_table( - "instance", - "table", - default_operation_timeout=99, - default_per_row_timeout=98, - default_per_request_timeout=97, - ) as table: - try: - await table.read_rows( - ReadRowsQuery(), - operation_timeout=operation_timeout, - per_row_timeout=per_row_timeout, - per_request_timeout=per_request_timeout, - ) - except RuntimeError: - pass - kwargs = mock_op.call_args_list[0].kwargs - assert kwargs["operation_timeout"] == operation_timeout - assert kwargs["per_row_timeout"] == per_row_timeout - assert kwargs["per_request_timeout"] == per_request_timeout From 7606e3a6244c8a569e2b6a32399cea9eecc654cd Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 17:00:48 -0700 Subject: [PATCH 329/349] update submodules in nox --- noxfile.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/noxfile.py b/noxfile.py index ebce695d0..02e694727 100644 --- a/noxfile.py +++ b/noxfile.py @@ -166,6 +166,13 @@ def install_unittest_dependencies(session, *constraints): session.install(*UNIT_TEST_EXTERNAL_DEPENDENCIES, *constraints) if UNIT_TEST_LOCAL_DEPENDENCIES: + # update submodules if needed + import subprocess + try: + subprocess.call(["git", "submodule", "update", "--init", "--recursive"]) + except OSError: + # git is not installed + pass session.install("-e", *UNIT_TEST_LOCAL_DEPENDENCIES, *constraints) if UNIT_TEST_EXTRAS_BY_PYTHON: @@ -224,6 +231,13 @@ def install_systemtest_dependencies(session, *constraints): session.install(*SYSTEM_TEST_EXTERNAL_DEPENDENCIES, *constraints) if SYSTEM_TEST_LOCAL_DEPENDENCIES: + # update submodules if needed + import subprocess + try: + subprocess.call(["git", "submodule", "update", "--init", "--recursive"]) + except OSError: + # git is not installed + pass session.install("-e", *SYSTEM_TEST_LOCAL_DEPENDENCIES, *constraints) if SYSTEM_TEST_DEPENDENCIES: From 829e68fd5e3e5613325a01164b9c00191d40dd5d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 21 Apr 2023 17:02:03 -0700 Subject: [PATCH 330/349] ran black --- noxfile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/noxfile.py b/noxfile.py index 02e694727..b9f5f2389 100644 --- a/noxfile.py +++ b/noxfile.py @@ -168,6 +168,7 @@ def install_unittest_dependencies(session, *constraints): if UNIT_TEST_LOCAL_DEPENDENCIES: # update submodules if needed import subprocess + try: subprocess.call(["git", "submodule", "update", "--init", "--recursive"]) except OSError: @@ -233,6 +234,7 @@ def install_systemtest_dependencies(session, *constraints): if SYSTEM_TEST_LOCAL_DEPENDENCIES: # update submodules if needed import subprocess + try: subprocess.call(["git", "submodule", "update", "--init", "--recursive"]) except OSError: From 6a58e860c8f443428782c56253665aa1e3ed0e0f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 10:11:16 -0700 Subject: [PATCH 331/349] removed submodule update --- noxfile.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/noxfile.py b/noxfile.py index b9f5f2389..ebce695d0 100644 --- a/noxfile.py +++ b/noxfile.py @@ -166,14 +166,6 @@ def install_unittest_dependencies(session, *constraints): session.install(*UNIT_TEST_EXTERNAL_DEPENDENCIES, *constraints) if UNIT_TEST_LOCAL_DEPENDENCIES: - # update submodules if needed - import subprocess - - try: - subprocess.call(["git", "submodule", "update", "--init", "--recursive"]) - except OSError: - # git is not installed - pass session.install("-e", *UNIT_TEST_LOCAL_DEPENDENCIES, *constraints) if UNIT_TEST_EXTRAS_BY_PYTHON: @@ -232,14 +224,6 @@ def install_systemtest_dependencies(session, *constraints): session.install(*SYSTEM_TEST_EXTERNAL_DEPENDENCIES, *constraints) if SYSTEM_TEST_LOCAL_DEPENDENCIES: - # update submodules if needed - import subprocess - - try: - subprocess.call(["git", "submodule", "update", "--init", "--recursive"]) - except OSError: - # git is not installed - pass session.install("-e", *SYSTEM_TEST_LOCAL_DEPENDENCIES, *constraints) if SYSTEM_TEST_DEPENDENCIES: From 9be5b07f6862ccd2fbdc36c62536685bb67b70d8 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 10:37:17 -0700 Subject: [PATCH 332/349] removed unneeded import --- google/cloud/bigtable/client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 186f7e309..e70cfe722 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -19,7 +19,6 @@ cast, Any, Optional, - AsyncIterable, Set, TYPE_CHECKING, ) From 83ffe315c7f313452ca2f49ac549bb8eee99179c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 12:44:26 -0700 Subject: [PATCH 333/349] added submodule update to trampoline.sh --- .kokoro/trampoline.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.kokoro/trampoline.sh b/.kokoro/trampoline.sh index f39236e94..962311a1a 100755 --- a/.kokoro/trampoline.sh +++ b/.kokoro/trampoline.sh @@ -24,5 +24,7 @@ function cleanup() { } trap cleanup EXIT +git submodule update --init --recursive + $(dirname $0)/populate-secrets.sh # Secret Manager secrets. -python3 "${KOKORO_GFILE_DIR}/trampoline_v1.py" \ No newline at end of file +python3 "${KOKORO_GFILE_DIR}/trampoline_v1.py" From 2de0c5e32fe35bbc69cd34841c48a29b23b1a190 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 12:48:41 -0700 Subject: [PATCH 334/349] reverted submodule update --- .kokoro/trampoline.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/.kokoro/trampoline.sh b/.kokoro/trampoline.sh index 962311a1a..a4241db23 100755 --- a/.kokoro/trampoline.sh +++ b/.kokoro/trampoline.sh @@ -24,7 +24,5 @@ function cleanup() { } trap cleanup EXIT -git submodule update --init --recursive - $(dirname $0)/populate-secrets.sh # Secret Manager secrets. python3 "${KOKORO_GFILE_DIR}/trampoline_v1.py" From 3d597dd68b44a2947621919f18198d9085044be0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 28 Apr 2023 14:25:26 -0700 Subject: [PATCH 335/349] added api-core fork to external dependencies --- noxfile.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/noxfile.py b/noxfile.py index ebce695d0..4bad18957 100644 --- a/noxfile.py +++ b/noxfile.py @@ -39,8 +39,8 @@ "pytest-cov", "pytest-asyncio", ] -UNIT_TEST_EXTERNAL_DEPENDENCIES = [] -UNIT_TEST_LOCAL_DEPENDENCIES = ["python-api-core"] +UNIT_TEST_EXTERNAL_DEPENDENCIES = ["git+https://github.com/googleapis/python-api-core.git@retry_generators"] +UNIT_TEST_LOCAL_DEPENDENCIES = [] UNIT_TEST_DEPENDENCIES = [] UNIT_TEST_EXTRAS = [] UNIT_TEST_EXTRAS_BY_PYTHON = {} @@ -52,8 +52,9 @@ "pytest-asyncio", "google-cloud-testutils", ] -SYSTEM_TEST_EXTERNAL_DEPENDENCIES = [] -SYSTEM_TEST_LOCAL_DEPENDENCIES = ["python-api-core"] +SYSTEM_TEST_EXTERNAL_DEPENDENCIES = ["git+https://github.com/googleapis/python-api-core.git@retry_generators"] +SYSTEM_TEST_LOCAL_DEPENDENCIES = [] +UNIT_TEST_DEPENDENCIES = [] SYSTEM_TEST_DEPENDENCIES = [] SYSTEM_TEST_EXTRAS = [] SYSTEM_TEST_EXTRAS_BY_PYTHON = {} From a7d6d253a6dcab571e897c439a1275f1f71b23e6 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 28 Apr 2023 14:41:48 -0700 Subject: [PATCH 336/349] changed pinned api-core version --- noxfile.py | 8 ++++++-- testing/constraints-3.7.txt | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/noxfile.py b/noxfile.py index 4bad18957..e67f21bdc 100644 --- a/noxfile.py +++ b/noxfile.py @@ -39,7 +39,9 @@ "pytest-cov", "pytest-asyncio", ] -UNIT_TEST_EXTERNAL_DEPENDENCIES = ["git+https://github.com/googleapis/python-api-core.git@retry_generators"] +UNIT_TEST_EXTERNAL_DEPENDENCIES = [ + "git+https://github.com/googleapis/python-api-core.git@retry_generators" +] UNIT_TEST_LOCAL_DEPENDENCIES = [] UNIT_TEST_DEPENDENCIES = [] UNIT_TEST_EXTRAS = [] @@ -52,7 +54,9 @@ "pytest-asyncio", "google-cloud-testutils", ] -SYSTEM_TEST_EXTERNAL_DEPENDENCIES = ["git+https://github.com/googleapis/python-api-core.git@retry_generators"] +SYSTEM_TEST_EXTERNAL_DEPENDENCIES = [ + "git+https://github.com/googleapis/python-api-core.git@retry_generators" +] SYSTEM_TEST_LOCAL_DEPENDENCIES = [] UNIT_TEST_DEPENDENCIES = [] SYSTEM_TEST_DEPENDENCIES = [] diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index d14da7c0c..13ed9eac9 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -5,7 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 -google-api-core==1.34.0 +google-api-core==2.11.0 google-cloud-core==1.4.1 grpc-google-iam-v1==0.12.4 proto-plus==1.22.0 From 55ca37f576b495035465a19221b534255c2dd415 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 28 Apr 2023 15:09:35 -0700 Subject: [PATCH 337/349] brought in shared deadline logic from mutate_rows --- google/cloud/bigtable/_read_rows.py | 2 +- google/cloud/bigtable/exceptions.py | 38 +++++++++++++++++++++++++++++ google/cloud/bigtable/iterators.py | 23 +++++------------ tests/unit/test__read_rows.py | 17 +++++++------ 4 files changed, 54 insertions(+), 26 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index 3dab23de3..1b669efd1 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -69,9 +69,9 @@ def __init__( self, request: dict[str, Any], client: BigtableAsyncClient, + operation_timeout: float, *, buffer_size: int = 0, - operation_timeout: float | None = None, per_row_timeout: float | None = None, per_request_timeout: float | None = None, ): diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 8ab4642c2..5c5e50ef3 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -12,14 +12,52 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations import sys +from typing import Callable, Any + from google.api_core import exceptions as core_exceptions is_311_plus = sys.version_info >= (3, 11) +def _convert_retry_deadline( + func: Callable[..., Any], + timeout_value: float, + retry_errors: list[Exception] | None = None, +): + """ + Decorator to convert RetryErrors raised by api_core.retry into + DeadlineExceeded exceptions, indicating that the underlying retries have + exhaused the timeout value. + Optionally attaches a RetryExceptionGroup to the DeadlineExceeded.__cause__, + detailing the failed exceptions associated with each retry. + Args: + - func: The function to decorate + - timeout_value: The timeout value to display in the DeadlineExceeded error message + - retry_errors: An optional list of exceptions to attach as a RetryExceptionGroup to the DeadlineExceeded.__cause__ + """ + + async def wrapper(*args, **kwargs): + try: + return await func(*args, **kwargs) + except core_exceptions.RetryError: + new_exc = core_exceptions.DeadlineExceeded( + f"operation_timeout of {timeout_value:0.1f}s exceeded" + ) + source_exc = None + if retry_errors: + source_exc = RetryExceptionGroup( + f"{len(retry_errors)} failed attempts", retry_errors + ) + new_exc.__cause__ = source_exc + raise new_exc from source_exc + + return wrapper + + class IdleTimeout(core_exceptions.DeadlineExceeded): """ Exception raised by ReadRowsIterator when the generator diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index 24241e094..bb4070393 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -24,9 +24,8 @@ from google.cloud.bigtable._read_rows import _ReadRowsOperation from google.cloud.bigtable_v2.types import RequestStats -from google.api_core import exceptions as core_exceptions -from google.cloud.bigtable.exceptions import RetryExceptionGroup from google.cloud.bigtable.exceptions import IdleTimeout +from google.cloud.bigtable.exceptions import _convert_retry_deadline from google.cloud.bigtable.row import Row @@ -103,26 +102,16 @@ async def __anext__(self) -> Row: merger = cast(_ReadRowsOperation, self._merger_or_error) try: self.last_interaction_time = time.time() - next_item = await merger.__anext__() + # convert RetryErrors into DeadlineExceeded while calling anext() + deadline_wrapped_next = _convert_retry_deadline( + merger.__anext__, merger.operation_timeout, merger.transient_errors + ) + next_item = await deadline_wrapped_next() if isinstance(next_item, RequestStats): self.request_stats = next_item return await self.__anext__() else: return next_item - except core_exceptions.RetryError: - # raised by AsyncRetry after operation deadline exceeded - new_exc = core_exceptions.DeadlineExceeded( - f"operation_timeout of {merger.operation_timeout:0.1f}s exceeded" - ) - source_exc = None - if merger.transient_errors: - source_exc = RetryExceptionGroup( - f"{len(merger.transient_errors)} failed attempts", - merger.transient_errors, - ) - new_exc.__cause__ = source_exc - await self._finish_with_error(new_exc) - raise new_exc from source_exc except Exception as e: await self._finish_with_error(e) raise e diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index c958d691f..82162febf 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -34,10 +34,11 @@ def test_ctor_defaults(self): client = mock.Mock() client.read_rows = mock.Mock() client.read_rows.return_value = None - instance = self._make_one(request, client) + instance = self._make_one(request, client, 10) assert instance.transient_errors == [] assert instance._last_seen_row_key is None assert instance._emit_count == 0 + assert instance.operation_timeout == 10 retryable_fn = instance._partial_retryable assert retryable_fn.func == instance._read_rows_retryable_attempt assert retryable_fn.args[0] == client.read_rows @@ -82,7 +83,7 @@ def test___aiter__(self): request = {} client = mock.Mock() client.read_rows = mock.Mock() - instance = self._make_one(request, client) + instance = self._make_one(request, client, 10) assert instance.__aiter__() is instance @pytest.mark.asyncio @@ -94,7 +95,7 @@ async def test_transient_error_capture(self): test_exc = core_exceptions.Aborted("test") test_exc2 = core_exceptions.DeadlineExceeded("test") client.read_rows.side_effect = [test_exc, test_exc2] - instance = self._make_one({}, client) + instance = self._make_one({}, client, 10) with pytest.raises(RuntimeError): await instance.__anext__() assert len(instance.transient_errors) == 2 @@ -206,7 +207,7 @@ def test_revise_to_empty_rowset(self): @pytest.mark.asyncio async def test_revise_limit(self, start_limit, emit_num, expected_limit): request = {"rows_limit": start_limit} - instance = self._make_one(request, mock.Mock()) + instance = self._make_one(request, mock.Mock(), 10) instance._emit_count = emit_num instance._last_seen_row_key = "a" gapic_mock = mock.Mock() @@ -312,7 +313,7 @@ async def test_generator(): async def test_aclose(self): import asyncio - instance = self._make_one({}, mock.Mock()) + instance = self._make_one({}, mock.Mock(), 10) await instance.aclose() assert instance._stream is None assert instance._last_seen_row_key is None @@ -329,7 +330,7 @@ async def test_retryable_attempt_hit_limit(self, limit): """ from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - instance = self._make_one({}, mock.Mock()) + instance = self._make_one({}, mock.Mock(), 10) async def mock_gapic(*args, **kwargs): # continuously return a single row @@ -371,7 +372,7 @@ async def mock_stream(): _ReadRowsOperation, "merge_row_response_stream" ) as mock_stream_fn: mock_stream_fn.return_value = mock_stream() - instance = self._make_one({}, mock.AsyncMock()) + instance = self._make_one({}, mock.AsyncMock(), 10) first_row = await instance.__anext__() assert first_row.row_key == b"dup_key" second_row = await instance.__anext__() @@ -395,7 +396,7 @@ async def mock_stream(): _ReadRowsOperation, "merge_row_response_stream" ) as mock_stream_fn: mock_stream_fn.return_value = mock_stream() - instance = self._make_one({}, mock.AsyncMock()) + instance = self._make_one({}, mock.AsyncMock(), 10) first_row = await instance.__anext__() assert first_row.row_key == b"key1" second_row = await instance.__anext__() From fd27aa51f71f99747422f1ee2c87faaa706a810d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 12 May 2023 14:13:17 -0700 Subject: [PATCH 338/349] merged in read_rows performance optimizations --- google/cloud/bigtable/_read_rows.py | 107 +++++++------- google/cloud/bigtable/client.py | 27 +--- google/cloud/bigtable/iterators.py | 57 ++++---- google/cloud/bigtable/mutations.py | 13 +- google/cloud/bigtable/mutations_batcher.py | 6 +- google/cloud/bigtable/read_rows_query.py | 3 +- google/cloud/bigtable/row.py | 160 +++++++++++---------- python-api-core | 2 +- tests/unit/test__read_rows.py | 147 ++++++++++--------- tests/unit/test_client.py | 82 ++--------- tests/unit/test_iterators.py | 4 +- tests/unit/test_read_rows_acceptance.py | 4 +- tests/unit/test_row.py | 15 +- 13 files changed, 277 insertions(+), 350 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index 1b669efd1..98afd505d 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -24,7 +24,6 @@ from google.api_core import retry_async as retries from google.api_core import exceptions as core_exceptions -from abc import ABC, abstractmethod from typing import ( List, @@ -34,6 +33,7 @@ AsyncGenerator, Callable, Awaitable, + Type, ) """ @@ -69,10 +69,9 @@ def __init__( self, request: dict[str, Any], client: BigtableAsyncClient, - operation_timeout: float, *, buffer_size: int = 0, - per_row_timeout: float | None = None, + operation_timeout: float | None = None, per_request_timeout: float | None = None, ): """ @@ -81,7 +80,6 @@ def __init__( - client: the Bigtable client to use to make the request - buffer_size: the size of the buffer to use for caching rows from the network - operation_timeout: the timeout to use for the entire operation, in seconds - - per_row_timeout: the timeout to use when waiting for each individual row, in seconds - per_request_timeout: the timeout to use when waiting for each individual grpc request, in seconds """ self._last_seen_row_key: bytes | None = None @@ -95,7 +93,6 @@ def __init__( self._read_rows_retryable_attempt, client.read_rows, buffer_size, - per_row_timeout, per_request_timeout, row_limit, ) @@ -177,7 +174,6 @@ async def _read_rows_retryable_attempt( self, gapic_fn: Callable[..., Awaitable[AsyncIterable[ReadRowsResponse]]], buffer_size: int, - per_row_timeout: float | None, per_request_timeout: float | None, total_row_limit: int, ) -> AsyncGenerator[Row | RequestStats, None]: @@ -207,9 +203,15 @@ async def _read_rows_retryable_attempt( return else: self._request["rows_limit"] = new_limit + params_str = f'table_name={self._request.get("table_name", "")}' + if self._request.get("app_profile_id", None): + params_str = ( + f'{params_str},app_profile_id={self._request.get("app_profile_id", "")}' + ) new_gapic_stream = await gapic_fn( self._request, timeout=per_request_timeout, + metadata=[("x-goog-request-params", params_str)], ) buffer: asyncio.Queue[Row | RequestStats | Exception] = asyncio.Queue( maxsize=buffer_size @@ -225,13 +227,9 @@ async def _read_rows_retryable_attempt( ) # run until we get a timeout or the stream is exhausted while True: - new_item = await asyncio.wait_for( - stream.__anext__(), timeout=per_row_timeout - ) - if isinstance(new_item, RequestStats): - yield new_item + new_item = await stream.__anext__() # ignore rows that have already been emitted - elif isinstance(new_item, Row) and ( + if isinstance(new_item, Row) and ( self._last_seen_row_key is None or new_item.row_key > self._last_seen_row_key ): @@ -243,11 +241,8 @@ async def _read_rows_retryable_attempt( self._emit_count += 1 if total_row_limit and self._emit_count >= total_row_limit: return - except asyncio.TimeoutError: - # per_row_timeout from asyncio.wait_for - raise core_exceptions.DeadlineExceeded( - f"per_row_timeout of {per_row_timeout:0.1f}s exceeded" - ) + elif isinstance(new_item, RequestStats): + yield new_item except StopAsyncIteration: # end of stream return @@ -380,7 +375,7 @@ def _reset_row(self) -> None: """ Drops the current row and transitions to AWAITING_NEW_ROW to start a fresh one """ - self.current_state: _State = AWAITING_NEW_ROW(self) + self.current_state: Type[_State] = AWAITING_NEW_ROW self.current_family: str | None = None self.current_qualifier: bytes | None = None self.adapter.reset() @@ -392,7 +387,7 @@ def is_terminal_state(self) -> bool: At the end of the read_rows stream, if the state machine is not in a terminal state, an exception should be raised """ - return isinstance(self.current_state, AWAITING_NEW_ROW) + return self.current_state == AWAITING_NEW_ROW def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> Row: """ @@ -402,7 +397,7 @@ def handle_last_scanned_row(self, last_scanned_row_key: bytes) -> Row: """ if self.last_seen_row_key and self.last_seen_row_key >= last_scanned_row_key: raise InvalidChunk("Last scanned row key is out of order") - if not isinstance(self.current_state, AWAITING_NEW_ROW): + if not self.current_state == AWAITING_NEW_ROW: raise InvalidChunk("Last scanned row key received in invalid state") scan_marker = _LastScannedRow(last_scanned_row_key) self._handle_complete_row(scan_marker) @@ -426,10 +421,10 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> Row | None: return None # process the chunk and update the state - self.current_state = self.current_state.handle_chunk(chunk) + self.current_state = self.current_state.handle_chunk(self, chunk) if chunk.commit_row: # check if row is complete, and return it if so - if not isinstance(self.current_state, AWAITING_NEW_CELL): + if not self.current_state == AWAITING_NEW_CELL: raise InvalidChunk("Commit chunk received in invalid state") complete_row = self.adapter.finish_row() self._handle_complete_row(complete_row) @@ -455,7 +450,7 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): Called by StateMachine when a reset_row flag is set on a chunk """ # ensure reset chunk matches expectations - if isinstance(self.current_state, AWAITING_NEW_ROW): + if self.current_state == AWAITING_NEW_ROW: raise InvalidChunk("Reset chunk received when not processing row") if chunk.row_key: raise InvalidChunk("Reset chunk has a row key") @@ -472,7 +467,7 @@ def _handle_reset_chunk(self, chunk: ReadRowsResponse.CellChunk): self._reset_row() -class _State(ABC): +class _State: """ Represents a state the state machine can be in @@ -480,13 +475,8 @@ class _State(ABC): transitioning to the next state """ - __slots__ = ("_owner",) - - def __init__(self, owner: _StateMachine): - self._owner = owner - - @abstractmethod - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": + @staticmethod + def handle_chunk(owner:_StateMachine, chunk: ReadRowsResponse.CellChunk) -> "_State": raise NotImplementedError @@ -498,13 +488,14 @@ class AWAITING_NEW_ROW(_State): - AWAITING_NEW_CELL: when a chunk with a row_key is received """ - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": + @staticmethod + def handle_chunk(owner:_StateMachine, chunk: ReadRowsResponse.CellChunk) -> Type["_State"]: if not chunk.row_key: raise InvalidChunk("New row is missing a row key") - self._owner.adapter.start_row(chunk.row_key) + owner.adapter.start_row(chunk.row_key) # the first chunk signals both the start of a new row and the start of a new cell, so # force the chunk processing in the AWAITING_CELL_VALUE. - return AWAITING_NEW_CELL(self._owner).handle_chunk(chunk) + return AWAITING_NEW_CELL.handle_chunk(owner, chunk) class AWAITING_NEW_CELL(_State): @@ -516,42 +507,45 @@ class AWAITING_NEW_CELL(_State): - AWAITING_CELL_VALUE: when the value is split across multiple chunks """ - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": + @staticmethod + def handle_chunk(owner:_StateMachine, chunk: ReadRowsResponse.CellChunk) -> Type["_State"]: is_split = chunk.value_size > 0 # track latest cell data. New chunks won't send repeated data - if _chunk_has_field(chunk, "family_name"): - self._owner.current_family = chunk.family_name.value - if not _chunk_has_field(chunk, "qualifier"): + has_family = _chunk_has_field(chunk, "family_name") + has_qualifier = _chunk_has_field(chunk, "qualifier") + if has_family: + owner.current_family = chunk.family_name.value + if not has_qualifier: raise InvalidChunk("New family must specify qualifier") - if _chunk_has_field(chunk, "qualifier"): - self._owner.current_qualifier = chunk.qualifier.value - if self._owner.current_family is None: + if has_qualifier: + owner.current_qualifier = chunk.qualifier.value + if owner.current_family is None: raise InvalidChunk("Family not found") # ensure that all chunks after the first one are either missing a row # key or the row is the same - if chunk.row_key and chunk.row_key != self._owner.adapter.current_key: + if chunk.row_key and chunk.row_key != owner.adapter.current_key: raise InvalidChunk("Row key changed mid row") - if self._owner.current_family is None: + if owner.current_family is None: raise InvalidChunk("Missing family for new cell") - if self._owner.current_qualifier is None: + if owner.current_qualifier is None: raise InvalidChunk("Missing qualifier for new cell") - self._owner.adapter.start_cell( - family=self._owner.current_family, - qualifier=self._owner.current_qualifier, + owner.adapter.start_cell( + family=owner.current_family, + qualifier=owner.current_qualifier, labels=list(chunk.labels), timestamp_micros=chunk.timestamp_micros, ) - self._owner.adapter.cell_value(chunk.value) + owner.adapter.cell_value(chunk.value) # transition to new state if is_split: - return AWAITING_CELL_VALUE(self._owner) + return AWAITING_CELL_VALUE else: # cell is complete - self._owner.adapter.finish_cell() - return AWAITING_NEW_CELL(self._owner) + owner.adapter.finish_cell() + return AWAITING_NEW_CELL class AWAITING_CELL_VALUE(_State): @@ -563,7 +557,8 @@ class AWAITING_CELL_VALUE(_State): - AWAITING_CELL_VALUE: when additional value chunks are required """ - def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": + @staticmethod + def handle_chunk(owner:_StateMachine, chunk: ReadRowsResponse.CellChunk) -> Type["_State"]: # ensure reset chunk matches expectations if chunk.row_key: raise InvalidChunk("In progress cell had a row key") @@ -576,14 +571,14 @@ def handle_chunk(self, chunk: ReadRowsResponse.CellChunk) -> "_State": if chunk.labels: raise InvalidChunk("In progress cell had labels") is_last = chunk.value_size == 0 - self._owner.adapter.cell_value(chunk.value) + owner.adapter.cell_value(chunk.value) # transition to new state if not is_last: - return AWAITING_CELL_VALUE(self._owner) + return AWAITING_CELL_VALUE else: # cell is complete - self._owner.adapter.finish_cell() - return AWAITING_NEW_CELL(self._owner) + owner.adapter.finish_cell() + return AWAITING_NEW_CELL class _RowBuilder: diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index e70cfe722..275bfa4a3 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -293,7 +293,6 @@ def get_table( table_id: str, app_profile_id: str | None = None, default_operation_timeout: float = 60, - default_per_row_timeout: float | None = 10, default_per_request_timeout: float | None = None, ) -> Table: """ @@ -313,7 +312,6 @@ def get_table( table_id, app_profile_id, default_operation_timeout=default_operation_timeout, - default_per_row_timeout=default_per_row_timeout, default_per_request_timeout=default_per_request_timeout, ) @@ -342,7 +340,6 @@ def __init__( app_profile_id: str | None = None, *, default_operation_timeout: float = 60, - default_per_row_timeout: float | None = 10, default_per_request_timeout: float | None = None, ): """ @@ -359,8 +356,6 @@ def __init__( app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles default_operation_timeout: (Optional) The default timeout, in seconds - default_per_row_timeout: (Optional) The default timeout for individual - rows in all read_rows requests, in seconds default_per_request_timeout: (Optional) The default timeout for individual rpc requests, in seconds Raises: @@ -369,8 +364,6 @@ def __init__( # validate timeouts if default_operation_timeout <= 0: raise ValueError("default_operation_timeout must be greater than 0") - if default_per_row_timeout is not None and default_per_row_timeout <= 0: - raise ValueError("default_per_row_timeout must be greater than 0") if default_per_request_timeout is not None and default_per_request_timeout <= 0: raise ValueError("default_per_request_timeout must be greater than 0") if ( @@ -392,7 +385,6 @@ def __init__( self.app_profile_id = app_profile_id self.default_operation_timeout = default_operation_timeout - self.default_per_row_timeout = default_per_row_timeout self.default_per_request_timeout = default_per_request_timeout # raises RuntimeError if called outside of an async context (no running event loop) @@ -411,7 +403,6 @@ async def read_rows_stream( *, buffer_size: int = 0, operation_timeout: float | None = None, - per_row_timeout: float | None = None, per_request_timeout: float | None = None, ) -> ReadRowsIterator: """ @@ -434,11 +425,6 @@ async def read_rows_stream( Completed and bufferd results can still be accessed after the deadline is complete, with a DeadlineExceeded exception only raised after bufferd results are exhausted. If None, defaults to the Table's default_operation_timeout - - per_row_timeout: the time budget for a single row read, in seconds. If a row takes - longer than per_row_timeout to complete, the ongoing network request will be with a - DeadlineExceeded exception, and a retry may be attempted - Applies only to the underlying network call. - If None, defaults to the Table's default_per_row_timeout - per_request_timeout: the time budget for an individual network request, in seconds. If it takes longer than this time to complete, the request will be cancelled with a DeadlineExceeded exception, and a retry will be attempted. @@ -455,17 +441,16 @@ async def read_rows_stream( """ operation_timeout = operation_timeout or self.default_operation_timeout - per_row_timeout = per_row_timeout or self.default_per_row_timeout per_request_timeout = per_request_timeout or self.default_per_request_timeout if operation_timeout <= 0: raise ValueError("operation_timeout must be greater than 0") - if per_row_timeout is not None and per_row_timeout <= 0: - raise ValueError("per_row_timeout must be greater than 0") if per_request_timeout is not None and per_request_timeout <= 0: raise ValueError("per_request_timeout must be greater than 0") if per_request_timeout is not None and per_request_timeout > operation_timeout: - raise ValueError("per_request_timeout must be less than operation_timeout") + raise ValueError("per_request_timeout must not be greater than operation_timeout") + if per_request_timeout is None: + per_request_timeout = operation_timeout request = query._to_dict() if isinstance(query, ReadRowsQuery) else query request["table_name"] = self.table_name if self.app_profile_id: @@ -474,14 +459,13 @@ async def read_rows_stream( # read_rows smart retries is implemented using a series of iterators: # - client.read_rows: outputs raw ReadRowsResponse objects from backend. Has per_request_timeout # - ReadRowsOperation.merge_row_response_stream: parses chunks into rows - # - ReadRowsOperation.retryable_merge_rows: adds retries, caching, revised requests, per_row_timeout, per_row_timeout + # - ReadRowsOperation.retryable_merge_rows: adds retries, caching, revised requests, per_request_timeout # - ReadRowsIterator: adds idle_timeout, moves stats out of stream and into attribute row_merger = _ReadRowsOperation( request, self.client._gapic_client, buffer_size=buffer_size, operation_timeout=operation_timeout, - per_row_timeout=per_row_timeout, per_request_timeout=per_request_timeout, ) output_generator = ReadRowsIterator(row_merger) @@ -495,7 +479,6 @@ async def read_rows( query: ReadRowsQuery | dict[str, Any], *, operation_timeout: float | None = None, - per_row_timeout: float | None = None, per_request_timeout: float | None = None, ) -> list[Row]: """ @@ -509,7 +492,6 @@ async def read_rows( row_generator = await self.read_rows_stream( query, operation_timeout=operation_timeout, - per_row_timeout=per_row_timeout, per_request_timeout=per_request_timeout, ) results = [row async for row in row_generator] @@ -539,7 +521,6 @@ async def read_rows_sharded( limit: int | None, buffer_size: int | None = None, operation_timeout: int | float | None = 60, - per_row_timeout: int | float | None = 10, per_request_timeout: int | float | None = None, ) -> ReadRowsIterator: """ diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index bb4070393..7d7f963e4 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -24,8 +24,9 @@ from google.cloud.bigtable._read_rows import _ReadRowsOperation from google.cloud.bigtable_v2.types import RequestStats +from google.api_core import exceptions as core_exceptions +from google.cloud.bigtable.exceptions import RetryExceptionGroup from google.cloud.bigtable.exceptions import IdleTimeout -from google.cloud.bigtable.exceptions import _convert_retry_deadline from google.cloud.bigtable.row import Row @@ -35,7 +36,8 @@ class ReadRowsIterator(AsyncIterable[Row]): """ def __init__(self, merger: _ReadRowsOperation): - self._merger_or_error: _ReadRowsOperation | Exception = merger + self._merger: _ReadRowsOperation = merger + self._error: Exception | None = None self.request_stats: RequestStats | None = None self.last_interaction_time = time.time() self._idle_timeout_task: asyncio.Task[None] | None = None @@ -64,7 +66,7 @@ def active(self): """ Returns True if the iterator is still active and has not been closed """ - return not isinstance(self._merger_or_error, Exception) + return self._error is None async def _idle_timeout_coroutine(self, idle_timeout: float): """ @@ -96,25 +98,33 @@ async def __anext__(self) -> Row: Return the next item in the stream if active, or raise an exception if the stream has been closed. """ - if isinstance(self._merger_or_error, Exception): - raise self._merger_or_error - else: - merger = cast(_ReadRowsOperation, self._merger_or_error) - try: - self.last_interaction_time = time.time() - # convert RetryErrors into DeadlineExceeded while calling anext() - deadline_wrapped_next = _convert_retry_deadline( - merger.__anext__, merger.operation_timeout, merger.transient_errors + if self._error is not None: + raise self._error + try: + self.last_interaction_time = time.time() + next_item = await self._merger.__anext__() + if isinstance(next_item, RequestStats): + self.request_stats = next_item + return await self.__anext__() + else: + return next_item + except core_exceptions.RetryError: + # raised by AsyncRetry after operation deadline exceeded + new_exc = core_exceptions.DeadlineExceeded( + f"operation_timeout of {self._merger.operation_timeout:0.1f}s exceeded" + ) + source_exc = None + if self._merger.transient_errors: + source_exc = RetryExceptionGroup( + f"{len(self._merger.transient_errors)} failed attempts", + self._merger.transient_errors, ) - next_item = await deadline_wrapped_next() - if isinstance(next_item, RequestStats): - self.request_stats = next_item - return await self.__anext__() - else: - return next_item - except Exception as e: - await self._finish_with_error(e) - raise e + new_exc.__cause__ = source_exc + await self._finish_with_error(new_exc) + raise new_exc from source_exc + except Exception as e: + await self._finish_with_error(e) + raise e async def _finish_with_error(self, e: Exception): """ @@ -122,9 +132,8 @@ async def _finish_with_error(self, e: Exception): after an error has occurred. """ if self.active: - merger = cast(_ReadRowsOperation, self._merger_or_error) - await merger.aclose() - self._merger_or_error = e + await self._merger.aclose() + self._error = e if self._idle_timeout_task is not None: self._idle_timeout_task.cancel() self._idle_timeout_task = None diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 4ff59bff9..3bb5b2ed6 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -15,7 +15,6 @@ from __future__ import annotations from dataclasses import dataclass -from google.cloud.bigtable.row import family_id, qualifier, row_key class Mutation: @@ -24,23 +23,23 @@ class Mutation: @dataclass class SetCell(Mutation): - family: family_id - column_qualifier: qualifier + family: str + column_qualifier: bytes new_value: bytes | str | int timestamp_ms: int | None = None @dataclass class DeleteRangeFromColumn(Mutation): - family: family_id - column_qualifier: qualifier + family: str + column_qualifier: bytes start_timestamp_ms: int end_timestamp_ms: int @dataclass class DeleteAllFromFamily(Mutation): - family_to_delete: family_id + family_to_delete: str @dataclass @@ -50,5 +49,5 @@ class DeleteAllFromRow(Mutation): @dataclass class BulkMutationsEntry: - row: row_key + row: bytes mutations: list[Mutation] | Mutation diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 582786ee4..9681f4382 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -18,12 +18,14 @@ from typing import TYPE_CHECKING from google.cloud.bigtable.mutations import Mutation -from google.cloud.bigtable.row import row_key from google.cloud.bigtable.row_filters import RowFilter if TYPE_CHECKING: from google.cloud.bigtable.client import Table # pragma: no cover +# Type alias used internally for readability. +_row_key_type = bytes + class MutationsBatcher: """ @@ -44,7 +46,7 @@ class MutationsBatcher: batcher.add(row, mut) """ - queue: asyncio.Queue[tuple[row_key, list[Mutation]]] + queue: asyncio.Queue[tuple[_row_key_type, list[Mutation]]] conditional_queues: dict[RowFilter, tuple[list[Mutation], list[Mutation]]] MB_SIZE = 1024 * 1024 diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 559b47f04..e26f99d34 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -14,7 +14,6 @@ # from __future__ import annotations from typing import TYPE_CHECKING, Any -from .row import row_key from dataclasses import dataclass from google.cloud.bigtable.row_filters import RowFilter @@ -26,7 +25,7 @@ class _RangePoint: """Model class for a point in a row range""" - key: row_key + key: bytes is_inclusive: bool diff --git a/google/cloud/bigtable/row.py b/google/cloud/bigtable/row.py index 2fe7cf58c..a7c723a1b 100644 --- a/google/cloud/bigtable/row.py +++ b/google/cloud/bigtable/row.py @@ -15,14 +15,12 @@ from __future__ import annotations from collections import OrderedDict -from typing import Sequence, Generator, overload, Any +from typing import Sequence, Generator, overload, Any, Set from functools import total_ordering # Type aliases used internally for readability. -row_key = bytes -family_id = str -qualifier = bytes -row_value = bytes +_family_type = str +_qualifier_type = bytes class Row(Sequence["Cell"]): @@ -37,9 +35,11 @@ class Row(Sequence["Cell"]): cells = row["family", "qualifier"] """ + __slots__ = ("row_key", "cells", "_index_data") + def __init__( self, - key: row_key, + key: bytes, cells: list[Cell], ): """ @@ -49,23 +49,28 @@ def __init__( They are returned by the Bigtable backend. """ self.row_key = key - self._cells_map: dict[family_id, dict[qualifier, list[Cell]]] = OrderedDict() - self._cells_list: list[Cell] = [] - # add cells to internal stores using Bigtable native ordering - for cell in cells: - if cell.family not in self._cells_map: - self._cells_map[cell.family] = OrderedDict() - if cell.column_qualifier not in self._cells_map[cell.family]: - self._cells_map[cell.family][cell.column_qualifier] = [] - self._cells_map[cell.family][cell.column_qualifier].append(cell) - self._cells_list.append(cell) + self.cells: list[Cell] = cells + # index is lazily created when needed + self._index_data: OrderedDict[ + _family_type, OrderedDict[_qualifier_type, list[Cell]] + ] | None = None @property - def cells(self) -> list[Cell]: + def _index( + self, + ) -> OrderedDict[_family_type, OrderedDict[_qualifier_type, list[Cell]]]: """ - Returns a list of all cells in the row + Returns an index of cells associated with each family and qualifier. + + The index is lazily created when needed """ - return self.get_cells() + if self._index_data is None: + self._index_data = OrderedDict() + for cell in self.cells: + self._index_data.setdefault(cell.family, OrderedDict()).setdefault( + cell.qualifier, [] + ).append(cell) + return self._index_data def get_cells( self, family: str | None = None, qualifier: str | bytes | None = None @@ -88,31 +93,29 @@ def get_cells( raise ValueError("Qualifier passed without family") else: # return all cells on get_cells() - return self._cells_list + return self.cells if qualifier is None: # return all cells in family on get_cells(family) return list(self._get_all_from_family(family)) if isinstance(qualifier, str): qualifier = qualifier.encode("utf-8") # return cells in family and qualifier on get_cells(family, qualifier) - if family not in self._cells_map: + if family not in self._index: raise ValueError(f"Family '{family}' not found in row '{self.row_key!r}'") - if qualifier not in self._cells_map[family]: + if qualifier not in self._index[family]: raise ValueError( f"Qualifier '{qualifier!r}' not found in family '{family}' in row '{self.row_key!r}'" ) - return self._cells_map[family][qualifier] + return self._index[family][qualifier] - def _get_all_from_family(self, family: family_id) -> Generator[Cell, None, None]: + def _get_all_from_family(self, family: str) -> Generator[Cell, None, None]: """ Returns all cells in the row for the family_id """ - if family not in self._cells_map: + if family not in self._index: raise ValueError(f"Family '{family}' not found in row '{self.row_key!r}'") - qualifier_dict = self._cells_map.get(family, {}) - for cell_batch in qualifier_dict.values(): - for cell in cell_batch: - yield cell + for qualifier in self._index[family]: + yield from self._index[family][qualifier] def __str__(self) -> str: """ @@ -155,25 +158,23 @@ def to_dict(self) -> dict[str, Any]: https://cloud.google.com/bigtable/docs/reference/data/rpc/google.bigtable.v2#row """ - families_list: list[dict[str, Any]] = [] - for family in self._cells_map: - column_list: list[dict[str, Any]] = [] - for qualifier in self._cells_map[family]: - cells_list: list[dict[str, Any]] = [] - for cell in self._cells_map[family][qualifier]: - cells_list.append(cell.to_dict()) - column_list.append({"qualifier": qualifier, "cells": cells_list}) - families_list.append({"name": family, "columns": column_list}) - return {"key": self.row_key, "families": families_list} + family_list = [] + for family_name, qualifier_dict in self._index.items(): + qualifier_list = [] + for qualifier_name, cell_list in qualifier_dict.items(): + cell_dicts = [cell.to_dict() for cell in cell_list] + qualifier_list.append( + {"qualifier": qualifier_name, "cells": cell_dicts} + ) + family_list.append({"name": family_name, "columns": qualifier_list}) + return {"key": self.row_key, "families": family_list} # Sequence and Mapping methods def __iter__(self): """ Allow iterating over all cells in the row """ - # iterate as a sequence; yield all cells - for cell in self._cells_list: - yield cell + return iter(self.cells) def __contains__(self, item): """ @@ -182,24 +183,22 @@ def __contains__(self, item): Works for both cells in the internal list, and `family` or `(family, qualifier)` pairs associated with the cells """ - if isinstance(item, family_id): - # check if family key is in Row - return item in self._cells_map + if isinstance(item, _family_type): + return item in self._index elif ( isinstance(item, tuple) - and isinstance(item[0], family_id) - and isinstance(item[1], (qualifier, str)) + and isinstance(item[0], _family_type) + and isinstance(item[1], (bytes, str)) ): - # check if (family, qualifier) pair is in Row - qualifer = item[1] if isinstance(item[1], bytes) else item[1].encode() - return item[0] in self._cells_map and qualifer in self._cells_map[item[0]] + q = item[1] if isinstance(item[1], bytes) else item[1].encode("utf-8") + return item[0] in self._index and q in self._index[item[0]] # check if Cell is in Row - return item in self._cells_list + return item in self.cells @overload def __getitem__( self, - index: family_id | tuple[family_id, qualifier | str], + index: str | tuple[str, bytes | str], ) -> list[Cell]: # overload signature for type checking pass @@ -221,17 +220,17 @@ def __getitem__(self, index): Supports indexing by family, (family, qualifier) pair, numerical index, and index slicing """ - if isinstance(index, family_id): + if isinstance(index, _family_type): return self.get_cells(family=index) elif ( isinstance(index, tuple) - and isinstance(index[0], family_id) - and isinstance(index[1], (qualifier, str)) + and isinstance(index[0], _family_type) + and isinstance(index[1], (bytes, str)) ): return self.get_cells(family=index[0], qualifier=index[1]) elif isinstance(index, int) or isinstance(index, slice): # index is int or slice - return self._cells_list[index] + return self.cells[index] else: raise TypeError( "Index must be family_id, (family_id, qualifier), int, or slice" @@ -241,19 +240,15 @@ def __len__(self): """ Implements `len()` operator """ - return len(self._cells_list) + return len(self.cells) - def get_column_components(self): + def get_column_components(self) -> list[tuple[str, bytes]]: """ Returns a list of (family, qualifier) pairs associated with the cells Pairs can be used for indexing """ - key_list = [] - for family in self._cells_map: - for qualifier in self._cells_map[family]: - key_list.append((family, qualifier)) - return key_list + return [(f, q) for f in self._index for q in self._index[f]] def __eq__(self, other): """ @@ -265,7 +260,7 @@ def __eq__(self, other): return False if self.row_key != other.row_key: return False - if len(self._cells_list) != len(other._cells_list): + if len(self.cells) != len(other.cells): return False components = self.get_column_components() other_components = other.get_column_components() @@ -277,7 +272,7 @@ def __eq__(self, other): if len(self[family, qualifier]) != len(other[family, qualifier]): return False # compare individual cell lists - if self._cells_list != other._cells_list: + if self.cells != other.cells: return False return True @@ -313,12 +308,21 @@ class Cell: Expected to be read-only to users, and written by backend """ + __slots__ = ( + "value", + "row_key", + "family", + "qualifier", + "timestamp_micros", + "labels", + ) + def __init__( self, - value: row_value, - row: row_key, - family: family_id, - column_qualifier: qualifier | str, + value: bytes, + row_key: bytes, + family: str, + qualifier: bytes | str, timestamp_micros: int, labels: list[str] | None = None, ): @@ -329,11 +333,11 @@ def __init__( They are returned by the Bigtable backend. """ self.value = value - self.row_key = row + self.row_key = row_key self.family = family - if isinstance(column_qualifier, str): - column_qualifier = column_qualifier.encode() - self.column_qualifier = column_qualifier + if isinstance(qualifier, str): + qualifier = qualifier.encode() + self.qualifier = qualifier self.timestamp_micros = timestamp_micros self.labels = labels if labels is not None else [] @@ -371,7 +375,7 @@ def __repr__(self): """ Returns a string representation of the cell """ - return f"Cell(value={self.value!r}, row={self.row_key!r}, family='{self.family}', column_qualifier={self.column_qualifier!r}, timestamp_micros={self.timestamp_micros}, labels={self.labels})" + return f"Cell(value={self.value!r}, row_key={self.row_key!r}, family='{self.family}', qualifier={self.qualifier!r}, timestamp_micros={self.timestamp_micros}, labels={self.labels})" """For Bigtable native ordering""" @@ -383,14 +387,14 @@ def __lt__(self, other) -> bool: return NotImplemented this_ordering = ( self.family, - self.column_qualifier, + self.qualifier, -self.timestamp_micros, self.value, self.labels, ) other_ordering = ( other.family, - other.column_qualifier, + other.qualifier, -other.timestamp_micros, other.value, other.labels, @@ -406,7 +410,7 @@ def __eq__(self, other) -> bool: return ( self.row_key == other.row_key and self.family == other.family - and self.column_qualifier == other.column_qualifier + and self.qualifier == other.qualifier and self.value == other.value and self.timestamp_micros == other.timestamp_micros and len(self.labels) == len(other.labels) @@ -427,7 +431,7 @@ def __hash__(self): ( self.row_key, self.family, - self.column_qualifier, + self.qualifier, self.value, self.timestamp_micros, tuple(self.labels), diff --git a/python-api-core b/python-api-core index 0423ebe68..6104c5961 160000 --- a/python-api-core +++ b/python-api-core @@ -1 +1 @@ -Subproject commit 0423ebe6810ba7a4da024efaa0fd78f55bd26128 +Subproject commit 6104c59616380981b0b2510eb1ad2a49bac71aa8 diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index 82162febf..4925f1d18 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -8,7 +8,7 @@ from google.cloud.bigtable._read_rows import AWAITING_CELL_VALUE TEST_FAMILY = "family_name" -TEST_QUALIFIER = b"column_qualifier" +TEST_QUALIFIER = b"qualifier" TEST_TIMESTAMP = 123456789 TEST_LABELS = ["label1", "label2"] @@ -34,18 +34,17 @@ def test_ctor_defaults(self): client = mock.Mock() client.read_rows = mock.Mock() client.read_rows.return_value = None - instance = self._make_one(request, client, 10) + instance = self._make_one(request, client) assert instance.transient_errors == [] assert instance._last_seen_row_key is None assert instance._emit_count == 0 - assert instance.operation_timeout == 10 + assert instance.operation_timeout == None retryable_fn = instance._partial_retryable assert retryable_fn.func == instance._read_rows_retryable_attempt assert retryable_fn.args[0] == client.read_rows assert retryable_fn.args[1] == 0 assert retryable_fn.args[2] is None - assert retryable_fn.args[3] is None - assert retryable_fn.args[4] == 0 + assert retryable_fn.args[3] == 0 assert client.read_rows.call_count == 0 def test_ctor(self): @@ -56,14 +55,12 @@ def test_ctor(self): client.read_rows.return_value = None expected_buffer_size = 21 expected_operation_timeout = 42 - expected_row_timeout = 43 expected_request_timeout = 44 instance = self._make_one( request, client, buffer_size=expected_buffer_size, operation_timeout=expected_operation_timeout, - per_row_timeout=expected_row_timeout, per_request_timeout=expected_request_timeout, ) assert instance.transient_errors == [] @@ -74,16 +71,15 @@ def test_ctor(self): assert retryable_fn.func == instance._read_rows_retryable_attempt assert retryable_fn.args[0] == client.read_rows assert retryable_fn.args[1] == expected_buffer_size - assert retryable_fn.args[2] == expected_row_timeout - assert retryable_fn.args[3] == expected_request_timeout - assert retryable_fn.args[4] == row_limit + assert retryable_fn.args[2] == expected_request_timeout + assert retryable_fn.args[3] == row_limit assert client.read_rows.call_count == 0 def test___aiter__(self): request = {} client = mock.Mock() client.read_rows = mock.Mock() - instance = self._make_one(request, client, 10) + instance = self._make_one(request, client) assert instance.__aiter__() is instance @pytest.mark.asyncio @@ -95,7 +91,7 @@ async def test_transient_error_capture(self): test_exc = core_exceptions.Aborted("test") test_exc2 = core_exceptions.DeadlineExceeded("test") client.read_rows.side_effect = [test_exc, test_exc2] - instance = self._make_one({}, client, 10) + instance = self._make_one({}, client) with pytest.raises(RuntimeError): await instance.__anext__() assert len(instance.transient_errors) == 2 @@ -207,13 +203,13 @@ def test_revise_to_empty_rowset(self): @pytest.mark.asyncio async def test_revise_limit(self, start_limit, emit_num, expected_limit): request = {"rows_limit": start_limit} - instance = self._make_one(request, mock.Mock(), 10) + instance = self._make_one(request, mock.Mock()) instance._emit_count = emit_num instance._last_seen_row_key = "a" gapic_mock = mock.Mock() gapic_mock.side_effect = [RuntimeError("stop_fn")] attempt = instance._read_rows_retryable_attempt( - gapic_mock, 0, None, None, start_limit + gapic_mock, 0, None, start_limit ) if start_limit != 0 and expected_limit == 0: # if we emitted the expected number of rows, we should receive a StopAsyncIteration @@ -313,7 +309,7 @@ async def test_generator(): async def test_aclose(self): import asyncio - instance = self._make_one({}, mock.Mock(), 10) + instance = self._make_one({}, mock.Mock()) await instance.aclose() assert instance._stream is None assert instance._last_seen_row_key is None @@ -330,7 +326,7 @@ async def test_retryable_attempt_hit_limit(self, limit): """ from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - instance = self._make_one({}, mock.Mock(), 10) + instance = self._make_one({}, mock.Mock()) async def mock_gapic(*args, **kwargs): # continuously return a single row @@ -346,7 +342,7 @@ async def gen(): return gen() - gen = instance._read_rows_retryable_attempt(mock_gapic, 0, None, None, limit) + gen = instance._read_rows_retryable_attempt(mock_gapic, 0, None, limit) # should yield values up to the limit for i in range(limit): await gen.__anext__() @@ -372,7 +368,7 @@ async def mock_stream(): _ReadRowsOperation, "merge_row_response_stream" ) as mock_stream_fn: mock_stream_fn.return_value = mock_stream() - instance = self._make_one({}, mock.AsyncMock(), 10) + instance = self._make_one({}, mock.AsyncMock()) first_row = await instance.__anext__() assert first_row.row_key == b"dup_key" second_row = await instance.__anext__() @@ -396,7 +392,7 @@ async def mock_stream(): _ReadRowsOperation, "merge_row_response_stream" ) as mock_stream_fn: mock_stream_fn.return_value = mock_stream() - instance = self._make_one({}, mock.AsyncMock(), 10) + instance = self._make_one({}, mock.AsyncMock()) first_row = await instance.__anext__() assert first_row.row_key == b"key1" second_row = await instance.__anext__() @@ -418,7 +414,7 @@ def test_ctor(self): instance = self._make_one() assert instance.last_seen_row_key is None - assert isinstance(instance.current_state, AWAITING_NEW_ROW) + assert instance.current_state == AWAITING_NEW_ROW assert instance.current_family is None assert instance.current_qualifier is None assert isinstance(instance.adapter, _RowBuilder) @@ -431,11 +427,11 @@ def test_is_terminal_state(self): instance = self._make_one() assert instance.is_terminal_state() is True - instance.current_state = AWAITING_NEW_ROW(None) + instance.current_state = AWAITING_NEW_ROW assert instance.is_terminal_state() is True - instance.current_state = AWAITING_NEW_CELL(None) + instance.current_state = AWAITING_NEW_CELL assert instance.is_terminal_state() is False - instance.current_state = AWAITING_CELL_VALUE(None) + instance.current_state = AWAITING_CELL_VALUE assert instance.is_terminal_state() is False def test__reset_row(self): @@ -445,7 +441,7 @@ def test__reset_row(self): instance.current_qualifier = "qualifier" instance.adapter = mock.Mock() instance._reset_row() - assert isinstance(instance.current_state, AWAITING_NEW_ROW) + assert instance.current_state == AWAITING_NEW_ROW assert instance.current_family is None assert instance.current_qualifier is None assert instance.adapter.reset.call_count == 1 @@ -454,11 +450,11 @@ def test_handle_last_scanned_row_wrong_state(self): from google.cloud.bigtable.exceptions import InvalidChunk instance = self._make_one() - instance.current_state = AWAITING_NEW_CELL(None) + instance.current_state = AWAITING_NEW_CELL with pytest.raises(InvalidChunk) as e: instance.handle_last_scanned_row("row_key") assert e.value.args[0] == "Last scanned row key received in invalid state" - instance.current_state = AWAITING_CELL_VALUE(None) + instance.current_state = AWAITING_CELL_VALUE with pytest.raises(InvalidChunk) as e: instance.handle_last_scanned_row("row_key") assert e.value.args[0] == "Last scanned row key received in invalid state" @@ -485,7 +481,7 @@ def test_handle_last_scanned_row(self): assert instance.last_seen_row_key == b"b" assert isinstance(output_row, _LastScannedRow) assert output_row.row_key == b"b" - assert isinstance(instance.current_state, AWAITING_NEW_ROW) + assert instance.current_state == AWAITING_NEW_ROW assert instance.current_family is None assert instance.current_qualifier is None assert instance.adapter.reset.call_count == 1 @@ -500,7 +496,7 @@ def test__handle_complete_row(self): instance.adapter = mock.Mock() instance._handle_complete_row(Row(b"row_key", {})) assert instance.last_seen_row_key == b"row_key" - assert isinstance(instance.current_state, AWAITING_NEW_ROW) + assert instance.current_state == AWAITING_NEW_ROW assert instance.current_family is None assert instance.current_qualifier is None assert instance.adapter.reset.call_count == 1 @@ -580,8 +576,8 @@ def handle_chunk_with_commit_wrong_state(self, state): mock_state_handle.return_value = state(mock.Mock()) with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk(commit_row=True)._pb - instance.handle_chunk(chunk) - assert isinstance(instance.current_state, state) + instance.handle_chunk(mock.Mock(), chunk) + assert instance.current_state == state assert e.value.args[0] == "Commit chunk received with in invalid state" def test_handle_chunk_with_commit(self): @@ -597,7 +593,7 @@ def test_handle_chunk_with_commit(self): assert isinstance(output, Row) assert output.row_key == b"row_key" assert output[0].family == "f" - assert output[0].column_qualifier == b"q" + assert output[0].qualifier == b"q" assert instance.last_seen_row_key == b"row_key" assert mock_reset.call_count == 1 @@ -614,7 +610,7 @@ def test_handle_chunk_with_commit_empty_strings(self): assert isinstance(output, Row) assert output.row_key == b"row_key" assert output[0].family == "" - assert output[0].column_qualifier == b"" + assert output[0].qualifier == b"" assert instance.last_seen_row_key == b"row_key" assert mock_reset.call_count == 1 @@ -636,14 +632,14 @@ class TestState(unittest.TestCase): def test_AWAITING_NEW_ROW_empty_key(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - instance = AWAITING_NEW_ROW(mock.Mock()) + instance = AWAITING_NEW_ROW with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk(row_key=b"")._pb - instance.handle_chunk(chunk) + instance.handle_chunk(mock.Mock(), chunk) assert "missing a row key" in e.value.args[0] with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk()._pb - instance.handle_chunk(chunk) + instance.handle_chunk(mock.Mock(), chunk) assert "missing a row key" in e.value.args[0] def test_AWAITING_NEW_ROW(self): @@ -653,13 +649,14 @@ def test_AWAITING_NEW_ROW(self): """ from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - instance = AWAITING_NEW_ROW(mock.Mock()) + instance = AWAITING_NEW_ROW + state_machine = mock.Mock() with mock.patch.object(AWAITING_NEW_CELL, "handle_chunk") as mock_delegate: chunk = ReadRowsResponse.CellChunk(row_key=b"row_key")._pb - instance.handle_chunk(chunk) - assert instance._owner.adapter.start_row.call_count == 1 - assert instance._owner.adapter.start_row.call_args[0][0] == b"row_key" - mock_delegate.assert_called_once_with(chunk) + instance.handle_chunk(state_machine, chunk) + assert state_machine.adapter.start_row.call_count == 1 + assert state_machine.adapter.start_row.call_args[0][0] == b"row_key" + mock_delegate.assert_called_once_with(state_machine, chunk) def test_AWAITING_NEW_CELL_family_without_qualifier(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse @@ -667,10 +664,10 @@ def test_AWAITING_NEW_CELL_family_without_qualifier(self): state_machine = _StateMachine() state_machine.current_qualifier = b"q" - instance = AWAITING_NEW_CELL(state_machine) + instance = AWAITING_NEW_CELL with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk(family_name="fam")._pb - instance.handle_chunk(chunk) + instance.handle_chunk(state_machine, chunk) assert "New family must specify qualifier" in e.value.args[0] def test_AWAITING_NEW_CELL_qualifier_without_family(self): @@ -678,10 +675,10 @@ def test_AWAITING_NEW_CELL_qualifier_without_family(self): from google.cloud.bigtable._read_rows import _StateMachine state_machine = _StateMachine() - instance = AWAITING_NEW_CELL(state_machine) + instance = AWAITING_NEW_CELL with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk(qualifier=b"q")._pb - instance.handle_chunk(chunk) + instance.handle_chunk(state_machine, chunk) assert "Family not found" in e.value.args[0] def test_AWAITING_NEW_CELL_no_row_state(self): @@ -689,15 +686,15 @@ def test_AWAITING_NEW_CELL_no_row_state(self): from google.cloud.bigtable._read_rows import _StateMachine state_machine = _StateMachine() - instance = AWAITING_NEW_CELL(state_machine) + instance = AWAITING_NEW_CELL with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk()._pb - instance.handle_chunk(chunk) + instance.handle_chunk(state_machine, chunk) assert "Missing family for new cell" in e.value.args[0] state_machine.current_family = "fam" with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk()._pb - instance.handle_chunk(chunk) + instance.handle_chunk(state_machine, chunk) assert "Missing qualifier for new cell" in e.value.args[0] def test_AWAITING_NEW_CELL_invalid_row_key(self): @@ -705,11 +702,11 @@ def test_AWAITING_NEW_CELL_invalid_row_key(self): from google.cloud.bigtable._read_rows import _StateMachine state_machine = _StateMachine() - instance = AWAITING_NEW_CELL(state_machine) + instance = AWAITING_NEW_CELL state_machine.adapter.current_key = b"abc" with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk(row_key=b"123")._pb - instance.handle_chunk(chunk) + instance.handle_chunk(state_machine, chunk) assert "Row key changed mid row" in e.value.args[0] def test_AWAITING_NEW_CELL_success_no_split(self): @@ -718,7 +715,7 @@ def test_AWAITING_NEW_CELL_success_no_split(self): state_machine = _StateMachine() state_machine.adapter = mock.Mock() - instance = AWAITING_NEW_CELL(state_machine) + instance = AWAITING_NEW_CELL row_key = b"row_key" family = "fam" qualifier = b"q" @@ -734,7 +731,7 @@ def test_AWAITING_NEW_CELL_success_no_split(self): labels=labels, )._pb state_machine.adapter.current_key = row_key - new_state = instance.handle_chunk(chunk) + new_state = instance.handle_chunk(state_machine, chunk) assert state_machine.adapter.start_cell.call_count == 1 kwargs = state_machine.adapter.start_cell.call_args[1] assert kwargs["family"] == family @@ -744,7 +741,7 @@ def test_AWAITING_NEW_CELL_success_no_split(self): assert state_machine.adapter.cell_value.call_count == 1 assert state_machine.adapter.cell_value.call_args[0][0] == value assert state_machine.adapter.finish_cell.call_count == 1 - assert isinstance(new_state, AWAITING_NEW_CELL) + assert new_state == AWAITING_NEW_CELL def test_AWAITING_NEW_CELL_success_with_split(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse @@ -752,7 +749,7 @@ def test_AWAITING_NEW_CELL_success_with_split(self): state_machine = _StateMachine() state_machine.adapter = mock.Mock() - instance = AWAITING_NEW_CELL(state_machine) + instance = AWAITING_NEW_CELL row_key = b"row_key" family = "fam" qualifier = b"q" @@ -769,7 +766,7 @@ def test_AWAITING_NEW_CELL_success_with_split(self): labels=labels, )._pb state_machine.adapter.current_key = row_key - new_state = instance.handle_chunk(chunk) + new_state = instance.handle_chunk(state_machine, chunk) assert state_machine.adapter.start_cell.call_count == 1 kwargs = state_machine.adapter.start_cell.call_args[1] assert kwargs["family"] == family @@ -779,17 +776,17 @@ def test_AWAITING_NEW_CELL_success_with_split(self): assert state_machine.adapter.cell_value.call_count == 1 assert state_machine.adapter.cell_value.call_args[0][0] == value assert state_machine.adapter.finish_cell.call_count == 0 - assert isinstance(new_state, AWAITING_CELL_VALUE) + assert new_state == AWAITING_CELL_VALUE def test_AWAITING_CELL_VALUE_w_row_key(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable._read_rows import _StateMachine state_machine = _StateMachine() - instance = AWAITING_CELL_VALUE(state_machine) + instance = AWAITING_CELL_VALUE with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk(row_key=b"123")._pb - instance.handle_chunk(chunk) + instance.handle_chunk(state_machine, chunk) assert "In progress cell had a row key" in e.value.args[0] def test_AWAITING_CELL_VALUE_w_family(self): @@ -797,10 +794,10 @@ def test_AWAITING_CELL_VALUE_w_family(self): from google.cloud.bigtable._read_rows import _StateMachine state_machine = _StateMachine() - instance = AWAITING_CELL_VALUE(state_machine) + instance = AWAITING_CELL_VALUE with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk(family_name="")._pb - instance.handle_chunk(chunk) + instance.handle_chunk(state_machine, chunk) assert "In progress cell had a family name" in e.value.args[0] def test_AWAITING_CELL_VALUE_w_qualifier(self): @@ -808,10 +805,10 @@ def test_AWAITING_CELL_VALUE_w_qualifier(self): from google.cloud.bigtable._read_rows import _StateMachine state_machine = _StateMachine() - instance = AWAITING_CELL_VALUE(state_machine) + instance = AWAITING_CELL_VALUE with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk(qualifier=b"")._pb - instance.handle_chunk(chunk) + instance.handle_chunk(state_machine, chunk) assert "In progress cell had a qualifier" in e.value.args[0] def test_AWAITING_CELL_VALUE_w_timestamp(self): @@ -819,10 +816,10 @@ def test_AWAITING_CELL_VALUE_w_timestamp(self): from google.cloud.bigtable._read_rows import _StateMachine state_machine = _StateMachine() - instance = AWAITING_CELL_VALUE(state_machine) + instance = AWAITING_CELL_VALUE with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk(timestamp_micros=123)._pb - instance.handle_chunk(chunk) + instance.handle_chunk(state_machine, chunk) assert "In progress cell had a timestamp" in e.value.args[0] def test_AWAITING_CELL_VALUE_w_labels(self): @@ -830,10 +827,10 @@ def test_AWAITING_CELL_VALUE_w_labels(self): from google.cloud.bigtable._read_rows import _StateMachine state_machine = _StateMachine() - instance = AWAITING_CELL_VALUE(state_machine) + instance = AWAITING_CELL_VALUE with pytest.raises(InvalidChunk) as e: chunk = ReadRowsResponse.CellChunk(labels=[""])._pb - instance.handle_chunk(chunk) + instance.handle_chunk(state_machine, chunk) assert "In progress cell had labels" in e.value.args[0] def test_AWAITING_CELL_VALUE_continuation(self): @@ -842,14 +839,14 @@ def test_AWAITING_CELL_VALUE_continuation(self): state_machine = _StateMachine() state_machine.adapter = mock.Mock() - instance = AWAITING_CELL_VALUE(state_machine) + instance = AWAITING_CELL_VALUE value = b"value" chunk = ReadRowsResponse.CellChunk(value=value, value_size=1)._pb - new_state = instance.handle_chunk(chunk) + new_state = instance.handle_chunk(state_machine, chunk) assert state_machine.adapter.cell_value.call_count == 1 assert state_machine.adapter.cell_value.call_args[0][0] == value assert state_machine.adapter.finish_cell.call_count == 0 - assert isinstance(new_state, AWAITING_CELL_VALUE) + assert new_state == AWAITING_CELL_VALUE def test_AWAITING_CELL_VALUE_final_chunk(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse @@ -857,14 +854,14 @@ def test_AWAITING_CELL_VALUE_final_chunk(self): state_machine = _StateMachine() state_machine.adapter = mock.Mock() - instance = AWAITING_CELL_VALUE(state_machine) + instance = AWAITING_CELL_VALUE value = b"value" chunk = ReadRowsResponse.CellChunk(value=value, value_size=0)._pb - new_state = instance.handle_chunk(chunk) + new_state = instance.handle_chunk(state_machine, chunk) assert state_machine.adapter.cell_value.call_count == 1 assert state_machine.adapter.cell_value.call_args[0][0] == value assert state_machine.adapter.finish_cell.call_count == 1 - assert isinstance(new_state, AWAITING_NEW_CELL) + assert new_state == AWAITING_NEW_CELL class TestRowBuilder(unittest.TestCase): @@ -911,7 +908,7 @@ def test_start_cell(self): row_builder.start_row(b"row_key") row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) self.assertEqual(row_builder.working_cell.family, TEST_FAMILY) - self.assertEqual(row_builder.working_cell.column_qualifier, TEST_QUALIFIER) + self.assertEqual(row_builder.working_cell.qualifier, TEST_QUALIFIER) self.assertEqual(row_builder.working_cell.timestamp_micros, TEST_TIMESTAMP) self.assertEqual(row_builder.working_cell.labels, TEST_LABELS) self.assertEqual(row_builder.working_value, b"") @@ -937,7 +934,7 @@ def test_finish_cell(self): self.assertEqual(len(row_builder.completed_cells), 1) self.assertEqual(row_builder.completed_cells[0].family, TEST_FAMILY) self.assertEqual( - row_builder.completed_cells[0].column_qualifier, TEST_QUALIFIER + row_builder.completed_cells[0].qualifier, TEST_QUALIFIER ) self.assertEqual( row_builder.completed_cells[0].timestamp_micros, TEST_TIMESTAMP @@ -954,7 +951,7 @@ def test_finish_cell(self): self.assertEqual(len(row_builder.completed_cells), 2) self.assertEqual(row_builder.completed_cells[1].family, TEST_FAMILY) self.assertEqual( - row_builder.completed_cells[1].column_qualifier, TEST_QUALIFIER + row_builder.completed_cells[1].qualifier, TEST_QUALIFIER ) self.assertEqual( row_builder.completed_cells[1].timestamp_micros, TEST_TIMESTAMP @@ -993,7 +990,7 @@ def test_finish_row(self): self.assertEqual(len(output), 3) for i in range(3): self.assertEqual(output[i].family, str(i)) - self.assertEqual(output[i].column_qualifier, TEST_QUALIFIER) + self.assertEqual(output[i].qualifier, TEST_QUALIFIER) self.assertEqual(output[i].timestamp_micros, TEST_TIMESTAMP) self.assertEqual(output[i].labels, TEST_LABELS) self.assertEqual(output[i].value, b"cell_value: " + str(i).encode("utf-8")) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index d374b34d8..835fa1595 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -789,7 +789,6 @@ async def test_table_ctor(self): expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" expected_operation_timeout = 123 - expected_per_row_timeout = 21 expected_per_request_timeout = 12 client = BigtableDataClient() assert not client._active_instances @@ -800,7 +799,6 @@ async def test_table_ctor(self): expected_table_id, expected_app_profile_id, default_operation_timeout=expected_operation_timeout, - default_per_row_timeout=expected_per_row_timeout, default_per_request_timeout=expected_per_request_timeout, ) await asyncio.sleep(0) @@ -810,7 +808,6 @@ async def test_table_ctor(self): assert table.client is client assert table.instance_name in client._active_instances assert table.default_operation_timeout == expected_operation_timeout - assert table.default_per_row_timeout == expected_per_row_timeout assert table.default_per_request_timeout == expected_per_request_timeout # ensure task reaches completion await table._register_instance_task @@ -826,9 +823,6 @@ async def test_table_ctor_bad_timeout_values(self): client = BigtableDataClient() - with pytest.raises(ValueError) as e: - Table(client, "", "", default_per_row_timeout=-1) - assert "default_per_row_timeout must be greater than 0" in str(e.value) with pytest.raises(ValueError) as e: Table(client, "", "", default_per_request_timeout=-1) assert "default_per_request_timeout must be greater than 0" in str(e.value) @@ -1048,69 +1042,26 @@ async def test_read_rows_timeout(self, operation_timeout): == f"operation_timeout of {operation_timeout:0.1f}s exceeded" ) - @pytest.mark.parametrize( - "per_row_t, operation_t, expected_num", - [ - (0.1, 0.01, 0), - (0.1, 0.19, 1), - (0.05, 0.54, 10), - (0.05, 0.14, 2), - (0.05, 0.24, 4), - ], - ) - @pytest.mark.asyncio - async def test_read_rows_per_row_timeout( - self, per_row_t, operation_t, expected_num - ): - from google.cloud.bigtable.exceptions import RetryExceptionGroup - - # mocking uniform ensures there are no sleeps between retries - with mock.patch("random.uniform", side_effect=lambda a, b: 0): - async with self._make_client() as client: - table = client.get_table("instance", "table") - query = ReadRowsQuery() - chunks = [self._make_chunk(row_key=b"test_1")] - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = ( - lambda *args, **kwargs: self._make_gapic_stream( - chunks, sleep_time=5 - ) - ) - try: - await table.read_rows( - query, - per_row_timeout=per_row_t, - operation_timeout=operation_t, - ) - except core_exceptions.DeadlineExceeded as deadline_exc: - retry_exc = deadline_exc.__cause__ - if expected_num == 0: - assert retry_exc is None - else: - assert type(retry_exc) == RetryExceptionGroup - assert f"{expected_num} failed attempts" in str(retry_exc) - assert len(retry_exc.exceptions) == expected_num - for sub_exc in retry_exc.exceptions: - assert ( - sub_exc.message - == f"per_row_timeout of {per_row_t:0.1f}s exceeded" - ) - @pytest.mark.parametrize( "per_request_t, operation_t, expected_num", [ - (0.05, 0.09, 1), - (0.05, 0.54, 10), - (0.05, 0.14, 2), - (0.05, 0.24, 4), + (0.05, 0.08, 2), + (0.05, 0.54, 11), + (0.05, 0.14, 3), + (0.05, 0.24, 5), ], ) @pytest.mark.asyncio async def test_read_rows_per_request_timeout( self, per_request_t, operation_t, expected_num ): + """ + Ensures that the per_request_timeout is respected and that the number of + requests is as expected. + + operation_timeout does not cancel the request, so we expect the number of + requests to be the ceiling of operation_timeout / per_request_timeout. + """ from google.cloud.bigtable.exceptions import RetryExceptionGroup # mocking uniform ensures there are no sleeps between retries @@ -1143,7 +1094,7 @@ async def test_read_rows_per_request_timeout( assert len(retry_exc.exceptions) == expected_num for sub_exc in retry_exc.exceptions: assert sub_exc.message == "mock deadline" - assert read_rows.call_count == expected_num + 1 + assert read_rows.call_count == expected_num called_kwargs = read_rows.call_args[1] assert called_kwargs["timeout"] == per_request_t @@ -1181,7 +1132,7 @@ async def test_read_rows_idle_timeout(self): await asyncio.sleep(0.2) # generator should be expired assert not gen.active - assert type(gen._merger_or_error) == IdleTimeout + assert type(gen._error) == IdleTimeout assert gen._idle_timeout_task is None await client.close() with pytest.raises(IdleTimeout) as e: @@ -1335,7 +1286,6 @@ async def test_read_rows_default_timeouts(self): from google.cloud.bigtable._read_rows import _ReadRowsOperation operation_timeout = 8 - per_row_timeout = 2 per_request_timeout = 4 with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: mock_op.side_effect = RuntimeError("mock error") @@ -1344,7 +1294,6 @@ async def test_read_rows_default_timeouts(self): "instance", "table", default_operation_timeout=operation_timeout, - default_per_row_timeout=per_row_timeout, default_per_request_timeout=per_request_timeout, ) as table: try: @@ -1353,7 +1302,6 @@ async def test_read_rows_default_timeouts(self): pass kwargs = mock_op.call_args_list[0].kwargs assert kwargs["operation_timeout"] == operation_timeout - assert kwargs["per_row_timeout"] == per_row_timeout assert kwargs["per_request_timeout"] == per_request_timeout @pytest.mark.asyncio @@ -1364,7 +1312,6 @@ async def test_read_rows_default_timeout_override(self): from google.cloud.bigtable._read_rows import _ReadRowsOperation operation_timeout = 8 - per_row_timeout = 2 per_request_timeout = 4 with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: mock_op.side_effect = RuntimeError("mock error") @@ -1373,19 +1320,16 @@ async def test_read_rows_default_timeout_override(self): "instance", "table", default_operation_timeout=99, - default_per_row_timeout=98, default_per_request_timeout=97, ) as table: try: await table.read_rows( ReadRowsQuery(), operation_timeout=operation_timeout, - per_row_timeout=per_row_timeout, per_request_timeout=per_request_timeout, ) except RuntimeError: pass kwargs = mock_op.call_args_list[0].kwargs assert kwargs["operation_timeout"] == operation_timeout - assert kwargs["per_row_timeout"] == per_row_timeout assert kwargs["per_request_timeout"] == per_request_timeout diff --git a/tests/unit/test_iterators.py b/tests/unit/test_iterators.py index 0bd9e8444..8640e6fc8 100644 --- a/tests/unit/test_iterators.py +++ b/tests/unit/test_iterators.py @@ -236,7 +236,7 @@ async def test__finish_with_error(self): err = ZeroDivisionError("mock error") await iterator._finish_with_error(err) assert iterator.active is False - assert iterator._merger_or_error is err + assert iterator._error is err assert iterator._idle_timeout_task is None with pytest.raises(ZeroDivisionError) as exc: await iterator.__anext__() @@ -254,7 +254,7 @@ async def test_aclose(self): assert iterator.active is True await iterator.aclose() assert iterator.active is False - assert isinstance(iterator._merger_or_error, StopAsyncIteration) + assert isinstance(iterator._error, StopAsyncIteration) assert iterator._idle_timeout_task is None with pytest.raises(StopAsyncIteration) as e: await iterator.__anext__() diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/test_read_rows_acceptance.py index b94548d9f..7bfb8d373 100644 --- a/tests/unit/test_read_rows_acceptance.py +++ b/tests/unit/test_read_rows_acceptance.py @@ -74,7 +74,7 @@ async def _scenerio_stream(): cell_result = ReadRowsTest.Result( row_key=cell.row_key, family_name=cell.family, - qualifier=cell.column_qualifier, + qualifier=cell.qualifier, timestamp_micros=cell.timestamp_micros, value=cell.value, label=cell.labels[0] if cell.labels else "", @@ -114,7 +114,7 @@ async def inner(): cell_result = ReadRowsTest.Result( row_key=cell.row_key, family_name=cell.family, - qualifier=cell.column_qualifier, + qualifier=cell.qualifier, timestamp_micros=cell.timestamp_micros, value=cell.value, label=cell.labels[0] if cell.labels else "", diff --git a/tests/unit/test_row.py b/tests/unit/test_row.py index 7f3d27cb9..1af09aad9 100644 --- a/tests/unit/test_row.py +++ b/tests/unit/test_row.py @@ -76,7 +76,7 @@ def test_get_cells(self): output = row_response.get_cells(family="1", qualifier=q) self.assertEqual(len(output), 1) self.assertEqual(output[0].family, "1") - self.assertEqual(output[0].column_qualifier, b"a") + self.assertEqual(output[0].qualifier, b"a") self.assertEqual(output[0], cell_list[0]) # calling with just qualifier should raise an error with self.assertRaises(ValueError): @@ -179,7 +179,6 @@ def test_to_dict(self): self.assertEqual(column.cells[1].labels, TEST_LABELS) def test_iteration(self): - from types import GeneratorType from google.cloud.bigtable.row import Cell # should be able to iterate over the Row as a list @@ -188,8 +187,6 @@ def test_iteration(self): cell3 = self._make_cell(value=b"3") row_response = self._make_one(TEST_ROW_KEY, [cell1, cell2, cell3]) self.assertEqual(len(row_response), 3) - # should create generator object - self.assertIsInstance(iter(row_response), GeneratorType) result_list = list(row_response) self.assertEqual(len(result_list), 3) # should be able to iterate over all cells @@ -486,7 +483,7 @@ def test_ctor(self): self.assertEqual(cell.value, TEST_VALUE) self.assertEqual(cell.row_key, TEST_ROW_KEY) self.assertEqual(cell.family, TEST_FAMILY_ID) - self.assertEqual(cell.column_qualifier, TEST_QUALIFIER) + self.assertEqual(cell.qualifier, TEST_QUALIFIER) self.assertEqual(cell.timestamp_micros, TEST_TIMESTAMP) self.assertEqual(cell.labels, TEST_LABELS) @@ -586,8 +583,8 @@ def test___repr__(self): cell = self._make_one() expected = ( - "Cell(value=b'1234', row=b'row', " - + "family='cf1', column_qualifier=b'col', " + "Cell(value=b'1234', row_key=b'row', " + + "family='cf1', qualifier=b'col', " + f"timestamp_micros={TEST_TIMESTAMP}, labels=['label1', 'label2'])" ) self.assertEqual(repr(cell), expected) @@ -607,8 +604,8 @@ def test___repr___no_labels(self): None, ) expected = ( - "Cell(value=b'1234', row=b'row', " - + "family='cf1', column_qualifier=b'col', " + "Cell(value=b'1234', row_key=b'row', " + + "family='cf1', qualifier=b'col', " + f"timestamp_micros={TEST_TIMESTAMP}, labels=[])" ) self.assertEqual(repr(cell_no_labels), expected) From 12a8879a677c1c8d688ed082f20855b912ecacac Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 12 May 2023 14:49:01 -0700 Subject: [PATCH 339/349] fixed lint issues --- google/cloud/bigtable/_read_rows.py | 16 ++++++++++++---- google/cloud/bigtable/client.py | 4 +++- google/cloud/bigtable/iterators.py | 6 ++---- google/cloud/bigtable/read_modify_write_rules.py | 12 +++++------- google/cloud/bigtable/row.py | 4 ++-- tests/unit/test__read_rows.py | 10 +++------- 6 files changed, 27 insertions(+), 25 deletions(-) diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index 98afd505d..7cb4ed973 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -476,7 +476,9 @@ class _State: """ @staticmethod - def handle_chunk(owner:_StateMachine, chunk: ReadRowsResponse.CellChunk) -> "_State": + def handle_chunk( + owner: _StateMachine, chunk: ReadRowsResponse.CellChunk + ) -> Type["_State"]: raise NotImplementedError @@ -489,7 +491,9 @@ class AWAITING_NEW_ROW(_State): """ @staticmethod - def handle_chunk(owner:_StateMachine, chunk: ReadRowsResponse.CellChunk) -> Type["_State"]: + def handle_chunk( + owner: _StateMachine, chunk: ReadRowsResponse.CellChunk + ) -> Type["_State"]: if not chunk.row_key: raise InvalidChunk("New row is missing a row key") owner.adapter.start_row(chunk.row_key) @@ -508,7 +512,9 @@ class AWAITING_NEW_CELL(_State): """ @staticmethod - def handle_chunk(owner:_StateMachine, chunk: ReadRowsResponse.CellChunk) -> Type["_State"]: + def handle_chunk( + owner: _StateMachine, chunk: ReadRowsResponse.CellChunk + ) -> Type["_State"]: is_split = chunk.value_size > 0 # track latest cell data. New chunks won't send repeated data has_family = _chunk_has_field(chunk, "family_name") @@ -558,7 +564,9 @@ class AWAITING_CELL_VALUE(_State): """ @staticmethod - def handle_chunk(owner:_StateMachine, chunk: ReadRowsResponse.CellChunk) -> Type["_State"]: + def handle_chunk( + owner: _StateMachine, chunk: ReadRowsResponse.CellChunk + ) -> Type["_State"]: # ensure reset chunk matches expectations if chunk.row_key: raise InvalidChunk("In progress cell had a row key") diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 275bfa4a3..bc4cdf476 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -448,7 +448,9 @@ async def read_rows_stream( if per_request_timeout is not None and per_request_timeout <= 0: raise ValueError("per_request_timeout must be greater than 0") if per_request_timeout is not None and per_request_timeout > operation_timeout: - raise ValueError("per_request_timeout must not be greater than operation_timeout") + raise ValueError( + "per_request_timeout must not be greater than operation_timeout" + ) if per_request_timeout is None: per_request_timeout = operation_timeout request = query._to_dict() if isinstance(query, ReadRowsQuery) else query diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index 7d7f963e4..a470ffeee 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -14,10 +14,8 @@ # from __future__ import annotations -from typing import ( - cast, - AsyncIterable, -) +from typing import AsyncIterable + import asyncio import time import sys diff --git a/google/cloud/bigtable/read_modify_write_rules.py b/google/cloud/bigtable/read_modify_write_rules.py index 839262ea2..cd6b370df 100644 --- a/google/cloud/bigtable/read_modify_write_rules.py +++ b/google/cloud/bigtable/read_modify_write_rules.py @@ -16,8 +16,6 @@ from dataclasses import dataclass -from google.cloud.bigtable.row import family_id, qualifier - class ReadModifyWriteRule: pass @@ -26,12 +24,12 @@ class ReadModifyWriteRule: @dataclass class IncrementRule(ReadModifyWriteRule): increment_amount: int - family: family_id - column_qualifier: qualifier + family: str + qualifier: bytes @dataclass class AppendValueRule(ReadModifyWriteRule): - append_value: bytes | str - family: family_id - column_qualifier: qualifier + append_value: bytes + family: str + qualifier: bytes diff --git a/google/cloud/bigtable/row.py b/google/cloud/bigtable/row.py index a7c723a1b..a5fb033e6 100644 --- a/google/cloud/bigtable/row.py +++ b/google/cloud/bigtable/row.py @@ -15,7 +15,7 @@ from __future__ import annotations from collections import OrderedDict -from typing import Sequence, Generator, overload, Any, Set +from typing import Sequence, Generator, overload, Any from functools import total_ordering # Type aliases used internally for readability. @@ -145,7 +145,7 @@ def __repr__(self): for family, qualifier in self.get_column_components(): cell_list = self[family, qualifier] repr_list = [cell.to_dict() for cell in cell_list] - cell_str_buffer.append(f" ('{family}', {qualifier}): {repr_list},") + cell_str_buffer.append(f" ('{family}', {qualifier!r}): {repr_list},") cell_str_buffer.append("}") cell_str = "\n".join(cell_str_buffer) output = f"Row(key={self.row_key!r}, cells={cell_str})" diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index 4925f1d18..668902ecb 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -38,7 +38,7 @@ def test_ctor_defaults(self): assert instance.transient_errors == [] assert instance._last_seen_row_key is None assert instance._emit_count == 0 - assert instance.operation_timeout == None + assert instance.operation_timeout is None retryable_fn = instance._partial_retryable assert retryable_fn.func == instance._read_rows_retryable_attempt assert retryable_fn.args[0] == client.read_rows @@ -933,9 +933,7 @@ def test_finish_cell(self): row_builder.finish_cell() self.assertEqual(len(row_builder.completed_cells), 1) self.assertEqual(row_builder.completed_cells[0].family, TEST_FAMILY) - self.assertEqual( - row_builder.completed_cells[0].qualifier, TEST_QUALIFIER - ) + self.assertEqual(row_builder.completed_cells[0].qualifier, TEST_QUALIFIER) self.assertEqual( row_builder.completed_cells[0].timestamp_micros, TEST_TIMESTAMP ) @@ -950,9 +948,7 @@ def test_finish_cell(self): row_builder.finish_cell() self.assertEqual(len(row_builder.completed_cells), 2) self.assertEqual(row_builder.completed_cells[1].family, TEST_FAMILY) - self.assertEqual( - row_builder.completed_cells[1].qualifier, TEST_QUALIFIER - ) + self.assertEqual(row_builder.completed_cells[1].qualifier, TEST_QUALIFIER) self.assertEqual( row_builder.completed_cells[1].timestamp_micros, TEST_TIMESTAMP ) From ddb34ed84a5c97487396250c76c39968f2eef52b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 18 May 2023 10:22:18 -0700 Subject: [PATCH 340/349] added implementation for row_exists --- google/cloud/bigtable/client.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index bc4cdf476..eccfc9549 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -47,6 +47,10 @@ from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable.iterators import ReadRowsIterator +from google.cloud.bigtable.row_filters import StripValueTransformerFilter +from google.cloud.bigtable.row_filters import CellsRowLimitFilter +from google.cloud.bigtable.row_filters import RowFilterChain + if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry from google.cloud.bigtable.mutations_batcher import MutationsBatcher @@ -551,7 +555,16 @@ async def row_exists( Returns: - a bool indicating whether the row exists """ - raise NotImplementedError + kwargs = { + "operation_timeout": operation_timeout, + "per_request_timeout": per_request_timeout, + } + strip_filter = StripValueTransformerFilter(flag=True) + limit_filter = CellsRowLimitFilter(1) + chain_filter = RowFilterChain(filters=[limit_filter, strip_filter]) + query = ReadRowsQuery(row_keys=row_key, limit=1, row_filter=chain_filter) + results = await self.read_rows(query, **kwargs) + return len(results) > 0 async def sample_keys( self, From e4a21a6c50bc263353b00f539289ef64b3e71fdc Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 18 May 2023 10:29:11 -0700 Subject: [PATCH 341/349] added implementation for read_row --- google/cloud/bigtable/client.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index eccfc9549..1f180f664 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -38,6 +38,7 @@ ) from google.cloud.client import ClientWithProject from google.api_core.exceptions import GoogleAPICallError +from google.api_core.exceptions import NotFound from google.cloud.bigtable._read_rows import _ReadRowsOperation import google.auth.credentials @@ -515,10 +516,21 @@ async def read_row( See read_rows_stream + Raises: + - google.api_core.exceptions.NotFound: if the row does not exist Returns: - the individual row requested """ - raise NotImplementedError + kwargs = { + "operation_timeout": operation_timeout, + "per_request_timeout": per_request_timeout, + } + row_key = row_key if isinstance(row_key, bytes) else row_key.encode() + query = ReadRowsQuery(row_keys=row_key, limit=1) + results = await self.read_rows(query, **kwargs) + if len(results) == 0: + raise NotFound(f"Row {row_key!r} not found") + return results[0] async def read_rows_sharded( self, From ea18255c58f32dcf08e33e1488da4b5281cd1ecc Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 18 May 2023 11:03:54 -0700 Subject: [PATCH 342/349] added unit tests --- google/cloud/bigtable/read_rows_query.py | 6 +- tests/unit/test_client.py | 99 ++++++++++++++++++++++++ tests/unit/test_read_rows_query.py | 2 +- 3 files changed, 105 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index e26f99d34..4cd4e9eb8 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -221,7 +221,11 @@ def _to_dict(self) -> dict[str, Any]: row_ranges.append(dict_range) row_keys = list(self.row_keys) row_keys.sort() - row_set = {"row_keys": row_keys, "row_ranges": row_ranges} + row_set = {} + if row_keys: + row_set["row_keys"] = row_keys + if row_ranges: + row_set["row_ranges"] = row_ranges final_dict: dict[str, Any] = { "rows": row_set, } diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 835fa1595..ca0ce144c 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1333,3 +1333,102 @@ async def test_read_rows_default_timeout_override(self): kwargs = mock_op.call_args_list[0].kwargs assert kwargs["operation_timeout"] == operation_timeout assert kwargs["per_request_timeout"] == per_request_timeout + + @pytest.mark.asyncio + async def test_read_row(self): + """Test reading a single row""" + async with self._make_client() as client: + table = client.get_table("instance", "table") + row_key = b"test_1" + with mock.patch.object(table, "read_rows") as read_rows: + expected_result = object() + read_rows.side_effect = lambda *args, **kwargs: [expected_result] + expected_op_timeout = 8 + expected_req_timeout = 4 + row = await table.read_row( + row_key, + operation_timeout=expected_op_timeout, + per_request_timeout=expected_req_timeout, + ) + assert row == expected_result + assert read_rows.call_count == 1 + args, kwargs = read_rows.call_args_list[0] + assert kwargs["operation_timeout"] == expected_op_timeout + assert kwargs["per_request_timeout"] == expected_req_timeout + assert len(args) == 1 + assert isinstance(args[0], ReadRowsQuery) + assert args[0]._to_dict() == { + "rows": {"row_keys": [row_key]}, + "rows_limit": 1, + } + + @pytest.mark.asyncio + async def test_read_row_no_response(self): + """should raise NotFound if row does not exist""" + async with self._make_client() as client: + table = client.get_table("instance", "table") + row_key = b"test_1" + with mock.patch.object(table, "read_rows") as read_rows: + # return no rows + read_rows.side_effect = lambda *args, **kwargs: [] + expected_op_timeout = 8 + expected_req_timeout = 4 + with pytest.raises(core_exceptions.NotFound): + await table.read_row( + row_key, + operation_timeout=expected_op_timeout, + per_request_timeout=expected_req_timeout, + ) + assert read_rows.call_count == 1 + args, kwargs = read_rows.call_args_list[0] + assert kwargs["operation_timeout"] == expected_op_timeout + assert kwargs["per_request_timeout"] == expected_req_timeout + assert isinstance(args[0], ReadRowsQuery) + assert args[0]._to_dict() == { + "rows": {"row_keys": [row_key]}, + "rows_limit": 1, + } + + @pytest.mark.parametrize( + "return_value,expected_result", + [ + ([], False), + ([object()], True), + ([object(), object()], True), + ], + ) + @pytest.mark.asyncio + async def test_row_exists(self, return_value, expected_result): + """Test checking for row existence""" + async with self._make_client() as client: + table = client.get_table("instance", "table") + row_key = b"test_1" + with mock.patch.object(table, "read_rows") as read_rows: + # return no rows + read_rows.side_effect = lambda *args, **kwargs: return_value + expected_op_timeout = 1 + expected_req_timeout = 2 + result = await table.row_exists( + row_key, + operation_timeout=expected_op_timeout, + per_request_timeout=expected_req_timeout, + ) + assert expected_result == result + assert read_rows.call_count == 1 + args, kwargs = read_rows.call_args_list[0] + assert kwargs["operation_timeout"] == expected_op_timeout + assert kwargs["per_request_timeout"] == expected_req_timeout + assert isinstance(args[0], ReadRowsQuery) + expected_filter = { + "chain": { + "filters": [ + {"cells_per_row_limit_filter": 1}, + {"strip_value_transformer": True}, + ] + } + } + assert args[0]._to_dict() == { + "rows": {"row_keys": [row_key]}, + "rows_limit": 1, + "filter": expected_filter, + } diff --git a/tests/unit/test_read_rows_query.py b/tests/unit/test_read_rows_query.py index aa690bc86..cbc87bb7b 100644 --- a/tests/unit/test_read_rows_query.py +++ b/tests/unit/test_read_rows_query.py @@ -300,7 +300,7 @@ def test_to_dict_rows_default(self): output = query._to_dict() self.assertTrue(isinstance(output, dict)) self.assertEqual(len(output.keys()), 1) - expected = {"rows": {"row_keys": [], "row_ranges": []}} + expected = {"rows": {}} self.assertEqual(output, expected) request_proto = ReadRowsRequest(**output) From a741762bca7038d6a2f4fc82fe93e5312a0b9b22 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 18 May 2023 11:40:18 -0700 Subject: [PATCH 343/349] fixed bug in query --- google/cloud/bigtable/read_rows_query.py | 6 +++--- tests/unit/test_read_rows_query.py | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 4cd4e9eb8..4a28206e5 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -106,12 +106,12 @@ def __init__( """ self.row_keys: set[bytes] = set() self.row_ranges: list[RowRange | dict[str, bytes]] = [] - if row_ranges: + if row_ranges is not None: if isinstance(row_ranges, RowRange): row_ranges = [row_ranges] for r in row_ranges: self.add_range(r) - if row_keys: + if row_keys is not None: if not isinstance(row_keys, list): row_keys = [row_keys] for k in row_keys: @@ -221,7 +221,7 @@ def _to_dict(self) -> dict[str, Any]: row_ranges.append(dict_range) row_keys = list(self.row_keys) row_keys.sort() - row_set = {} + row_set : dict[str, Any] = {} if row_keys: row_set["row_keys"] = row_keys if row_ranges: diff --git a/tests/unit/test_read_rows_query.py b/tests/unit/test_read_rows_query.py index cbc87bb7b..f630f2eab 100644 --- a/tests/unit/test_read_rows_query.py +++ b/tests/unit/test_read_rows_query.py @@ -355,5 +355,10 @@ def test_to_dict_rows_populated(self): filter_proto = request_proto.filter self.assertEqual(filter_proto, row_filter._to_pb()) + def test_empty_row_set(self): + """Empty strings should be treated as keys inputs""" + query = self._make_one(row_keys="") + self.assertEqual(query.row_keys, {b""}) + def test_shard(self): pass From c1895c8dcf7127f6e455c469de9d3cafad2361a4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 18 May 2023 11:40:33 -0700 Subject: [PATCH 344/349] added system tests --- tests/system/test_system.py | 47 ++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 543a14725..abf1026cc 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -156,7 +156,7 @@ def __init__(self, table): self.table = table async def add_row( - self, row_key, family=TEST_FAMILY, qualifier=b"q", value=b"test-value" + self, row_key, *, family=TEST_FAMILY, qualifier=b"q", value=b"test-value" ): request = { "table_name": self.table.table_name, @@ -310,3 +310,48 @@ async def test_read_rows_stream_inactive_timer(table, temp_rows): await generator.__anext__() assert "inactivity" in str(e) assert "idle_timeout=0.1" in str(e) + +@pytest.mark.asyncio +async def test_read_row(table, temp_rows): + """ + Test read_row (single row helper) + """ + from google.cloud.bigtable import Row + await temp_rows.add_row(b"row_key_1", value=b"value") + row = await table.read_row(b"row_key_1") + assert isinstance(row, Row) + assert row.row_key == b"row_key_1" + assert row.cells[0].value == b"value" + +@pytest.mark.asyncio +async def test_read_row_missing(table): + """ + Test read_row when row does not exist + """ + from google.api_core import exceptions + row_key = "row_key_not_exist" + with pytest.raises(exceptions.NotFound) as e: + await table.read_row(row_key) + assert str(e) == f"Row b'{row_key}' not found" + with pytest.raises(exceptions.InvalidArgument) as e: + await table.read_row("") + assert "Row kest must be non-empty" in str(e) + +@pytest.mark.asyncio +async def test_row_exists(table, temp_rows): + from google.api_core import exceptions + """Test row_exists with rows that exist and don't exist""" + assert await table.row_exists(b"row_key_1") is False + await temp_rows.add_row(b"row_key_1") + assert await table.row_exists(b"row_key_1") is True + assert await table.row_exists("row_key_1") is True + assert await table.row_exists(b"row_key_2") is False + assert await table.row_exists("row_key_2") is False + assert await table.row_exists("3") is False + await temp_rows.add_row(b"3") + assert await table.row_exists(b"3") is True + with pytest.raises(exceptions.InvalidArgument) as e: + await table.row_exists("") + assert "Row kest must be non-empty" in str(e) + + From 6ca9027b61932e4152bd6f1636a60454b1eee5eb Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 18 May 2023 11:49:58 -0700 Subject: [PATCH 345/349] raise type error if row_key is not str or bytes --- google/cloud/bigtable/client.py | 4 ++++ tests/unit/test_client.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 1f180f664..fc8491aaa 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -521,6 +521,8 @@ async def read_row( Returns: - the individual row requested """ + if not isinstance(row_key, bytes) and not isinstance(row_key, str): + raise TypeError("row_key must be bytes or string") kwargs = { "operation_timeout": operation_timeout, "per_request_timeout": per_request_timeout, @@ -567,6 +569,8 @@ async def row_exists( Returns: - a bool indicating whether the row exists """ + if not isinstance(row_key, bytes) and not isinstance(row_key, str): + raise TypeError("row_key must be bytes or string") kwargs = { "operation_timeout": operation_timeout, "per_request_timeout": per_request_timeout, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ca0ce144c..b53df2100 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1389,6 +1389,16 @@ async def test_read_row_no_response(self): "rows_limit": 1, } + @pytest.mark.parametrize("input_row", [None, 5, object()]) + @pytest.mark.asyncio + async def test_read_row_w_invalid_input(self, input_row): + """Should raise error when passed None""" + async with self._make_client() as client: + table = client.get_table("instance", "table") + with pytest.raises(TypeError) as e: + await table.read_row(input_row) + assert "row_key must be bytes or string" in e + @pytest.mark.parametrize( "return_value,expected_result", [ @@ -1432,3 +1442,14 @@ async def test_row_exists(self, return_value, expected_result): "rows_limit": 1, "filter": expected_filter, } + + @pytest.mark.parametrize("input_row", [None, 5, object()]) + @pytest.mark.asyncio + async def test_row_exists_w_invalid_input(self, input_row): + """Should raise error when passed None""" + async with self._make_client() as client: + table = client.get_table("instance", "table") + with pytest.raises(TypeError) as e: + await table.row_exists(input_row) + assert "row_key must be bytes or string" in e + From dc7a932aaaeeae1af07758727a7f51fd388be0a9 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 18 May 2023 11:57:39 -0700 Subject: [PATCH 346/349] added custom exception for RowNotFound --- google/cloud/bigtable/client.py | 6 +++--- google/cloud/bigtable/exceptions.py | 4 ++++ google/cloud/bigtable/read_rows_query.py | 2 +- tests/system/test_system.py | 11 ++++++++--- tests/unit/test_client.py | 7 ++++--- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index fc8491aaa..330ff59fe 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -38,7 +38,7 @@ ) from google.cloud.client import ClientWithProject from google.api_core.exceptions import GoogleAPICallError -from google.api_core.exceptions import NotFound +from google.cloud.bigtable.exceptions import RowNotFound from google.cloud.bigtable._read_rows import _ReadRowsOperation import google.auth.credentials @@ -517,7 +517,7 @@ async def read_row( See read_rows_stream Raises: - - google.api_core.exceptions.NotFound: if the row does not exist + - google.cloud.bigtable.exceptions.RowNotFound: if the row does not exist Returns: - the individual row requested """ @@ -531,7 +531,7 @@ async def read_row( query = ReadRowsQuery(row_keys=row_key, limit=1) results = await self.read_rows(query, **kwargs) if len(results) == 0: - raise NotFound(f"Row {row_key!r} not found") + raise RowNotFound(f"Row {row_key!r} not found") return results[0] async def read_rows_sharded( diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 5c5e50ef3..058b98962 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -71,6 +71,10 @@ class InvalidChunk(core_exceptions.GoogleAPICallError): """Exception raised to invalid chunk data from back-end.""" +class RowNotFound(core_exceptions.NotFound): + """Exception raised when a row is not found on a read_row call.""" + + class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore # noqa: F821 """ Represents one or more exceptions that occur during a bulk Bigtable operation diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 4a28206e5..6de84e918 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -221,7 +221,7 @@ def _to_dict(self) -> dict[str, Any]: row_ranges.append(dict_range) row_keys = list(self.row_keys) row_keys.sort() - row_set : dict[str, Any] = {} + row_set: dict[str, Any] = {} if row_keys: row_set["row_keys"] = row_keys if row_ranges: diff --git a/tests/system/test_system.py b/tests/system/test_system.py index abf1026cc..ff4104626 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -311,35 +311,42 @@ async def test_read_rows_stream_inactive_timer(table, temp_rows): assert "inactivity" in str(e) assert "idle_timeout=0.1" in str(e) + @pytest.mark.asyncio async def test_read_row(table, temp_rows): """ Test read_row (single row helper) """ from google.cloud.bigtable import Row + await temp_rows.add_row(b"row_key_1", value=b"value") row = await table.read_row(b"row_key_1") assert isinstance(row, Row) assert row.row_key == b"row_key_1" assert row.cells[0].value == b"value" + @pytest.mark.asyncio async def test_read_row_missing(table): """ Test read_row when row does not exist """ from google.api_core import exceptions + from google.cloud.bigtable.exceptions import RowNotFound + row_key = "row_key_not_exist" - with pytest.raises(exceptions.NotFound) as e: + with pytest.raises(RowNotFound) as e: await table.read_row(row_key) assert str(e) == f"Row b'{row_key}' not found" with pytest.raises(exceptions.InvalidArgument) as e: await table.read_row("") assert "Row kest must be non-empty" in str(e) + @pytest.mark.asyncio async def test_row_exists(table, temp_rows): from google.api_core import exceptions + """Test row_exists with rows that exist and don't exist""" assert await table.row_exists(b"row_key_1") is False await temp_rows.add_row(b"row_key_1") @@ -353,5 +360,3 @@ async def test_row_exists(table, temp_rows): with pytest.raises(exceptions.InvalidArgument) as e: await table.row_exists("") assert "Row kest must be non-empty" in str(e) - - diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index b53df2100..3dd07cc91 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1364,7 +1364,9 @@ async def test_read_row(self): @pytest.mark.asyncio async def test_read_row_no_response(self): - """should raise NotFound if row does not exist""" + """should raise RowNotFound if row does not exist""" + from google.cloud.bigtable.exceptions import RowNotFound + async with self._make_client() as client: table = client.get_table("instance", "table") row_key = b"test_1" @@ -1373,7 +1375,7 @@ async def test_read_row_no_response(self): read_rows.side_effect = lambda *args, **kwargs: [] expected_op_timeout = 8 expected_req_timeout = 4 - with pytest.raises(core_exceptions.NotFound): + with pytest.raises(RowNotFound): await table.read_row( row_key, operation_timeout=expected_op_timeout, @@ -1452,4 +1454,3 @@ async def test_row_exists_w_invalid_input(self, input_row): with pytest.raises(TypeError) as e: await table.row_exists(input_row) assert "row_key must be bytes or string" in e - From 7df031191d79ca3575a12164c760540ee9f2cdb2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 30 May 2023 09:50:04 -0700 Subject: [PATCH 347/349] clean-up --- google/cloud/bigtable/client.py | 29 ++++++++++++++--------------- tests/unit/test_client.py | 8 ++++---- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 063b92c93..f98438cf5 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -511,15 +511,14 @@ async def read_row( Returns: - the individual row requested """ - if not isinstance(row_key, bytes) and not isinstance(row_key, str): - raise TypeError("row_key must be bytes or string") - kwargs = { - "operation_timeout": operation_timeout, - "per_request_timeout": per_request_timeout, - } - row_key = row_key if isinstance(row_key, bytes) else row_key.encode() + if row_key is None: + raise ValueError("row_key must be string or bytes") query = ReadRowsQuery(row_keys=row_key, limit=1) - results = await self.read_rows(query, **kwargs) + results = await self.read_rows( + query, + operation_timeout=operation_timeout, + per_request_timeout=per_request_timeout, + ) if len(results) == 0: raise RowNotFound(f"Row {row_key!r} not found") return results[0] @@ -558,17 +557,17 @@ async def row_exists( Returns: - a bool indicating whether the row exists """ - if not isinstance(row_key, bytes) and not isinstance(row_key, str): - raise TypeError("row_key must be bytes or string") - kwargs = { - "operation_timeout": operation_timeout, - "per_request_timeout": per_request_timeout, - } + if row_key is None: + raise ValueError("row_key must be string or bytes") strip_filter = StripValueTransformerFilter(flag=True) limit_filter = CellsRowLimitFilter(1) chain_filter = RowFilterChain(filters=[limit_filter, strip_filter]) query = ReadRowsQuery(row_keys=row_key, limit=1, row_filter=chain_filter) - results = await self.read_rows(query, **kwargs) + results = await self.read_rows( + query, + operation_timeout=operation_timeout, + per_request_timeout=per_request_timeout, + ) return len(results) > 0 async def sample_keys( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index b60c77e05..d7adde30a 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1358,9 +1358,9 @@ async def test_read_row_w_invalid_input(self, input_row): """Should raise error when passed None""" async with self._make_client() as client: table = client.get_table("instance", "table") - with pytest.raises(TypeError) as e: + with pytest.raises(ValueError) as e: await table.read_row(input_row) - assert "row_key must be bytes or string" in e + assert "must be string or bytes" in e @pytest.mark.parametrize( "return_value,expected_result", @@ -1412,6 +1412,6 @@ async def test_row_exists_w_invalid_input(self, input_row): """Should raise error when passed None""" async with self._make_client() as client: table = client.get_table("instance", "table") - with pytest.raises(TypeError) as e: + with pytest.raises(ValueError) as e: await table.row_exists(input_row) - assert "row_key must be bytes or string" in e + assert "must be string or butes" in e From 51c2cd0395d90d67de53b8bb1f743e8f11af2a45 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 6 Jun 2023 18:56:53 -0700 Subject: [PATCH 348/349] ran black --- tests/system/test_system.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 7391fdf95..0121244d3 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -446,6 +446,7 @@ async def test_row_exists(table, temp_rows): await table.row_exists("") assert "Row kest must be non-empty" in str(e) + @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) @pytest.mark.parametrize( "cell_value,filter_input,expect_match", From 1351e531df667d2c1c71ae19d15e3d793d099dc9 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 14 Jun 2023 13:49:19 -0700 Subject: [PATCH 349/349] address PR comments --- google/cloud/bigtable/client.py | 10 +++--- google/cloud/bigtable/exceptions.py | 4 --- tests/system/test_system.py | 55 ++++++++++++++++++++++++----- tests/unit/test_client.py | 49 ++++++++++++++++++++----- 4 files changed, 92 insertions(+), 26 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index b3c52845d..0544bcb78 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -40,7 +40,6 @@ ) from google.cloud.client import ClientWithProject from google.api_core.exceptions import GoogleAPICallError -from google.cloud.bigtable.exceptions import RowNotFound from google.api_core import retry_async as retries from google.api_core import exceptions as core_exceptions from google.cloud.bigtable._read_rows import _ReadRowsOperation @@ -505,9 +504,10 @@ async def read_row( self, row_key: str | bytes, *, + row_filter: RowFilter | None = None, operation_timeout: int | float | None = 60, per_request_timeout: int | float | None = None, - ) -> Row: + ) -> Row | None: """ Helper function to return a single row @@ -516,18 +516,18 @@ async def read_row( Raises: - google.cloud.bigtable.exceptions.RowNotFound: if the row does not exist Returns: - - the individual row requested + - the individual row requested, or None if it does not exist """ if row_key is None: raise ValueError("row_key must be string or bytes") - query = ReadRowsQuery(row_keys=row_key, limit=1) + query = ReadRowsQuery(row_keys=row_key, row_filter=row_filter, limit=1) results = await self.read_rows( query, operation_timeout=operation_timeout, per_request_timeout=per_request_timeout, ) if len(results) == 0: - raise RowNotFound(f"Row {row_key!r} not found") + return None return results[0] async def read_rows_sharded( diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 2dfaabfa2..fe3bec7e9 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -39,10 +39,6 @@ class InvalidChunk(core_exceptions.GoogleAPICallError): """Exception raised to invalid chunk data from back-end.""" -class RowNotFound(core_exceptions.NotFound): - """Exception raised when a row is not found on a read_row call.""" - - class _RowSetComplete(Exception): """ Internal exception for _ReadRowsOperation diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 0121244d3..f6730576d 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -339,9 +339,9 @@ async def test_read_rows_range_query(table, temp_rows): @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) @pytest.mark.asyncio -async def test_read_rows_key_query(table, temp_rows): +async def test_read_rows_single_key_query(table, temp_rows): """ - Ensure that the read_rows method works + Ensure that the read_rows method works with specified query """ from google.cloud.bigtable import ReadRowsQuery @@ -349,7 +349,7 @@ async def test_read_rows_key_query(table, temp_rows): await temp_rows.add_row(b"b") await temp_rows.add_row(b"c") await temp_rows.add_row(b"d") - # full table scan + # retrieve specific keys query = ReadRowsQuery(row_keys=[b"a", b"c"]) row_list = await table.read_rows(query) assert len(row_list) == 2 @@ -357,6 +357,29 @@ async def test_read_rows_key_query(table, temp_rows): assert row_list[1].row_key == b"c" +@retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) +@pytest.mark.asyncio +async def test_read_rows_with_filter(table, temp_rows): + """ + ensure filters are applied + """ + from google.cloud.bigtable import ReadRowsQuery + from google.cloud.bigtable.row_filters import ApplyLabelFilter + + await temp_rows.add_row(b"a") + await temp_rows.add_row(b"b") + await temp_rows.add_row(b"c") + await temp_rows.add_row(b"d") + # retrieve keys with filter + expected_label = "test-label" + row_filter = ApplyLabelFilter(expected_label) + query = ReadRowsQuery(row_filter=row_filter) + row_list = await table.read_rows(query) + assert len(row_list) == 4 + for row in row_list: + assert row[0].labels == [expected_label] + + @pytest.mark.asyncio async def test_read_rows_stream_close(table, temp_rows): """ @@ -417,15 +440,31 @@ async def test_read_row_missing(table): Test read_row when row does not exist """ from google.api_core import exceptions - from google.cloud.bigtable.exceptions import RowNotFound row_key = "row_key_not_exist" - with pytest.raises(RowNotFound) as e: - await table.read_row(row_key) - assert str(e) == f"Row b'{row_key}' not found" + result = await table.read_row(row_key) + assert result is None with pytest.raises(exceptions.InvalidArgument) as e: await table.read_row("") - assert "Row kest must be non-empty" in str(e) + assert "Row key must be non-empty" in str(e) + + +@pytest.mark.asyncio +async def test_read_row_w_filter(table, temp_rows): + """ + Test read_row (single row helper) + """ + from google.cloud.bigtable import Row + from google.cloud.bigtable.row_filters import ApplyLabelFilter + + await temp_rows.add_row(b"row_key_1", value=b"value") + expected_label = "test-label" + label_filter = ApplyLabelFilter(expected_label) + row = await table.read_row(b"row_key_1", row_filter=label_filter) + assert isinstance(row, Row) + assert row.row_key == b"row_key_1" + assert row.cells[0].value == b"value" + assert row.cells[0].labels == [expected_label] @pytest.mark.asyncio diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 80b377f2a..14da80dae 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1325,10 +1325,41 @@ async def test_read_row(self): } @pytest.mark.asyncio - async def test_read_row_no_response(self): - """should raise RowNotFound if row does not exist""" - from google.cloud.bigtable.exceptions import RowNotFound + async def test_read_row_w_filter(self): + """Test reading a single row with an added filter""" + async with self._make_client() as client: + table = client.get_table("instance", "table") + row_key = b"test_1" + with mock.patch.object(table, "read_rows") as read_rows: + expected_result = object() + read_rows.side_effect = lambda *args, **kwargs: [expected_result] + expected_op_timeout = 8 + expected_req_timeout = 4 + mock_filter = mock.Mock() + expected_filter = {"filter": "mock filter"} + mock_filter._to_dict.return_value = expected_filter + row = await table.read_row( + row_key, + operation_timeout=expected_op_timeout, + per_request_timeout=expected_req_timeout, + row_filter=expected_filter, + ) + assert row == expected_result + assert read_rows.call_count == 1 + args, kwargs = read_rows.call_args_list[0] + assert kwargs["operation_timeout"] == expected_op_timeout + assert kwargs["per_request_timeout"] == expected_req_timeout + assert len(args) == 1 + assert isinstance(args[0], ReadRowsQuery) + assert args[0]._to_dict() == { + "rows": {"row_keys": [row_key]}, + "rows_limit": 1, + "filter": expected_filter, + } + @pytest.mark.asyncio + async def test_read_row_no_response(self): + """should return None if row does not exist""" async with self._make_client() as client: table = client.get_table("instance", "table") row_key = b"test_1" @@ -1337,12 +1368,12 @@ async def test_read_row_no_response(self): read_rows.side_effect = lambda *args, **kwargs: [] expected_op_timeout = 8 expected_req_timeout = 4 - with pytest.raises(RowNotFound): - await table.read_row( - row_key, - operation_timeout=expected_op_timeout, - per_request_timeout=expected_req_timeout, - ) + result = await table.read_row( + row_key, + operation_timeout=expected_op_timeout, + per_request_timeout=expected_req_timeout, + ) + assert result is None assert read_rows.call_count == 1 args, kwargs = read_rows.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout