diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py index 3dee2e1092e0..5acd74320b5f 100644 --- a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py @@ -144,9 +144,10 @@ def __init__(self, ) if client_info is None: - client_info = ( - google.api_core.gapic_v1.client_info.DEFAULT_CLIENT_INFO) - client_info.gapic_version = _GAPIC_LIBRARY_VERSION + client_info = google.api_core.gapic_v1.client_info.ClientInfo( + gapic_version=_GAPIC_LIBRARY_VERSION, ) + else: + client_info.gapic_version = _GAPIC_LIBRARY_VERSION self._client_info = client_info # Parse out the default settings for retry and timeout for each RPC @@ -165,9 +166,9 @@ def __init__(self, # Service calls def create_read_session(self, table_reference, - requested_streams, - parent=None, + parent, table_modifiers=None, + requested_streams=None, read_options=None, format_=None, retry=google.api_core.gapic_v1.method.DEFAULT, @@ -192,18 +193,26 @@ def create_read_session(self, >>> >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() >>> - >>> # TODO: Initialize ``table_reference``: + >>> # TODO: Initialize `table_reference`: >>> table_reference = {} >>> - >>> # TODO: Initialize ``requested_streams``: - >>> requested_streams = 0 + >>> # TODO: Initialize `parent`: + >>> parent = '' >>> - >>> response = client.create_read_session(table_reference, requested_streams) + >>> response = client.create_read_session(table_reference, parent) Args: table_reference (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableReference]): Required. Reference to the table to read. + If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableReference` + parent (str): Required. String of the form "projects/your-project-id" indicating the + project this ReadSession is associated with. This is the project that will + be billed for usage. + table_modifiers (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableModifiers]): Optional. Any modifiers to the Table (e.g. snapshot timestamp). + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableModifiers` requested_streams (int): Optional. Initial number of streams. If unset or 0, we will provide a value of streams so as to produce reasonable throughput. Must be non-negative. The number of streams may be lower than the requested number, @@ -211,12 +220,8 @@ def create_read_session(self, the maximum amount of parallelism allowed by the system. Streams must be read starting from offset 0. - parent (str): Required. Project which this ReadSession is associated with. This is the - project that will be billed for usage. - table_modifiers (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableModifiers]): Optional. Any modifiers to the Table (e.g. snapshot timestamp). - If a dict is provided, it must be of the same form as the protobuf - message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableModifiers` read_options (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableReadOptions]): Optional. Read options for this session (e.g. column selection, filters). + If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableReadOptions` format_ (~google.cloud.bigquery_storage_v1beta1.types.DataFormat): Data output format. Currently default to Avro. @@ -253,9 +258,9 @@ def create_read_session(self, request = storage_pb2.CreateReadSessionRequest( table_reference=table_reference, - requested_streams=requested_streams, parent=parent, table_modifiers=table_modifiers, + requested_streams=requested_streams, read_options=read_options, format=format_, ) @@ -298,7 +303,7 @@ def read_rows(self, >>> >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() >>> - >>> # TODO: Initialize ``read_position``: + >>> # TODO: Initialize `read_position`: >>> read_position = {} >>> >>> for element in client.read_rows(read_position): @@ -309,6 +314,7 @@ def read_rows(self, read_position (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition]): Required. Identifier of the position in the stream to start reading from. The offset requested must be less than the last row read from ReadRows. Requesting a larger offset is undefined. + If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.bigquery_storage_v1beta1.types.StreamPosition` retry (Optional[google.api_core.retry.Retry]): A retry object used @@ -374,10 +380,10 @@ def batch_create_read_session_streams( >>> >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() >>> - >>> # TODO: Initialize ``session``: + >>> # TODO: Initialize `session`: >>> session = {} >>> - >>> # TODO: Initialize ``requested_streams``: + >>> # TODO: Initialize `requested_streams`: >>> requested_streams = 0 >>> >>> response = client.batch_create_read_session_streams(session, requested_streams) @@ -385,6 +391,7 @@ def batch_create_read_session_streams( Args: session (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.ReadSession]): Required. Must be a non-expired session obtained from a call to CreateReadSession. Only the name field needs to be set. + If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadSession` requested_streams (int): Required. Number of new streams requested. Must be positive. @@ -466,13 +473,14 @@ def finalize_stream(self, >>> >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() >>> - >>> # TODO: Initialize ``stream``: + >>> # TODO: Initialize `stream`: >>> stream = {} >>> >>> client.finalize_stream(stream) Args: stream (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.Stream]): Stream to finalize. + If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.bigquery_storage_v1beta1.types.Stream` retry (Optional[google.api_core.retry.Retry]): A retry object used @@ -524,11 +532,11 @@ def split_read_stream(self, timeout=google.api_core.gapic_v1.method.DEFAULT, metadata=None): """ - Splits a given read stream into two Streams. These streams are referred to - as the primary and the residual of the split. The original stream can still - be read from in the same manner as before. Both of the returned streams can - also be read from, and the total rows return by both child streams will be - the same as the rows read from the original stream. + Splits a given read stream into two Streams. These streams are referred + to as the primary and the residual of the split. The original stream can + still be read from in the same manner as before. Both of the returned + streams can also be read from, and the total rows return by both child + streams will be the same as the rows read from the original stream. Moreover, the two child streams will be allocated back to back in the original Stream. Concretely, it is guaranteed that for streams Original, @@ -543,13 +551,14 @@ def split_read_stream(self, >>> >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() >>> - >>> # TODO: Initialize ``original_stream``: + >>> # TODO: Initialize `original_stream`: >>> original_stream = {} >>> >>> response = client.split_read_stream(original_stream) Args: original_stream (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.Stream]): Stream to split. + If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.bigquery_storage_v1beta1.types.Stream` retry (Optional[google.api_core.retry.Retry]): A retry object used diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/enums.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/enums.py index 47489b77d468..fc6e52d2e6fa 100644 --- a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/enums.py +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/enums.py @@ -23,7 +23,7 @@ class DataFormat(enum.IntEnum): Data format for input or output data. Attributes: - DATA_FORMAT_UNSPECIFIED (int) + DATA_FORMAT_UNSPECIFIED (int): Data format is unspecified. AVRO (int): Avro is a standard open source row based file format. See https://avro.apache.org/ for more details. """ diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py index c04464cc5c96..e5d4483b157e 100644 --- a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py @@ -186,11 +186,11 @@ def finalize_stream(self): def split_read_stream(self): """Return the gRPC stub for {$apiMethod.name}. - Splits a given read stream into two Streams. These streams are referred to - as the primary and the residual of the split. The original stream can still - be read from in the same manner as before. Both of the returned streams can - also be read from, and the total rows return by both child streams will be - the same as the rows read from the original stream. + Splits a given read stream into two Streams. These streams are referred + to as the primary and the residual of the split. The original stream can + still be read from in the same manner as before. Both of the returned + streams can also be read from, and the total rows return by both child + streams will be the same as the rows read from the original stream. Moreover, the two child streams will be allocated back to back in the original Stream. Concretely, it is guaranteed that for streams Original, diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py index 823a7e78338b..4a11ee41c446 100644 --- a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py @@ -623,7 +623,10 @@ DESCRIPTOR = _STREAM, __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' , - __doc__ = """Attributes: + __doc__ = """Information about a single data stream within a read session. + + + Attributes: name: Name of the stream. In the form ``/projects/{project_id}/stream/{stream_id}`` @@ -638,7 +641,12 @@ DESCRIPTOR = _STREAMPOSITION, __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' , - __doc__ = """Attributes: + __doc__ = """Expresses a point within a given stream using an offset position. + + + Attributes: + stream: + Identifier for a given Stream. offset: Position in the stream. """, @@ -650,7 +658,10 @@ DESCRIPTOR = _READSESSION, __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' , - __doc__ = """Attributes: + __doc__ = """Information returned from a ``CreateReadSession`` request. + + + Attributes: name: Unique identifier for the session. In the form ``projects/{project_id}/sessions/{session_id}`` @@ -679,11 +690,16 @@ DESCRIPTOR = _CREATEREADSESSIONREQUEST, __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' , - __doc__ = """Attributes: + __doc__ = """Creates a new read session, which may include additional options such as + requested parallelism, projection filters and constraints. + + + Attributes: table_reference: Required. Reference to the table to read. parent: - Required. Project which this ReadSession is associated with. + Required. String of the form "projects/your-project-id" + indicating the project this ReadSession is associated with. This is the project that will be billed for usage. table_modifiers: Optional. Any modifiers to the Table (e.g. snapshot @@ -710,7 +726,11 @@ DESCRIPTOR = _READROWSREQUEST, __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' , - __doc__ = """Attributes: + __doc__ = """Requesting row data via ``ReadRows`` must provide Stream position + information. + + + Attributes: read_position: Required. Identifier of the position in the stream to start reading from. The offset requested must be less than the last @@ -725,7 +745,10 @@ DESCRIPTOR = _STREAMSTATUS, __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' , - __doc__ = """Attributes: + __doc__ = """Progress information for a given Stream. + + + Attributes: estimated_row_count: Number of estimated rows in the current stream. May change over time as different readers in the stream progress at rates @@ -755,7 +778,14 @@ DESCRIPTOR = _READROWSRESPONSE, __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' , - __doc__ = """Attributes: + __doc__ = """Response from calling ``ReadRows`` may include row data, progress and + throttling information. + + + Attributes: + rows: + Row data is returned in format specified during session + creation. avro_rows: Serialized row data in AVRO format. status: @@ -772,7 +802,11 @@ DESCRIPTOR = _BATCHCREATEREADSESSIONSTREAMSREQUEST, __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' , - __doc__ = """Attributes: + __doc__ = """Information needed to request additional streams for an established read + session. + + + Attributes: session: Required. Must be a non-expired session obtained from a call to CreateReadSession. Only the name field needs to be set. @@ -789,7 +823,11 @@ DESCRIPTOR = _BATCHCREATEREADSESSIONSTREAMSRESPONSE, __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' , - __doc__ = """Attributes: + __doc__ = """The response from ``BatchCreateReadSessionStreams`` returns the stream + identifiers for the newly created streams. + + + Attributes: streams: Newly added streams. """, @@ -801,7 +839,10 @@ DESCRIPTOR = _FINALIZESTREAMREQUEST, __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' , - __doc__ = """Attributes: + __doc__ = """Request information for invoking ``FinalizeStream``. + + + Attributes: stream: Stream to finalize. """, @@ -813,7 +854,10 @@ DESCRIPTOR = _SPLITREADSTREAMREQUEST, __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' , - __doc__ = """Attributes: + __doc__ = """Request information for ``SplitReadStream``. + + + Attributes: original_stream: Stream to split. """, @@ -825,7 +869,10 @@ DESCRIPTOR = _SPLITREADSTREAMRESPONSE, __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' , - __doc__ = """Attributes: + __doc__ = """Response from ``SplitReadStream``. + + + Attributes: primary_stream: Primary stream. Will contain the beginning portion of \|original\_stream\|. diff --git a/bigquery_storage/tests/system/test_system.py b/bigquery_storage/tests/system/test_system.py index a9c4f691663a..99e7dd56e78d 100644 --- a/bigquery_storage/tests/system/test_system.py +++ b/bigquery_storage/tests/system/test_system.py @@ -44,8 +44,8 @@ def table_reference(): def test_read_rows(client, project_id, table_reference): session = client.create_read_session( table_reference, + 'projects/{}'.format(project_id), requested_streams=1, - parent='projects/{}'.format(project_id), ) stream_pos = bigquery_storage_v1beta1.types.StreamPosition( @@ -60,7 +60,6 @@ def test_read_rows(client, project_id, table_reference): def test_split_read_stream(client, project_id, table_reference): session = client.create_read_session( table_reference, - requested_streams=1, parent='projects/{}'.format(project_id), ) diff --git a/bigquery_storage/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py b/bigquery_storage/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py index ec9a3527b87f..699517f480c0 100644 --- a/bigquery_storage/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py +++ b/bigquery_storage/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py @@ -82,16 +82,14 @@ def test_create_read_session(self): # Setup Request table_reference = {} - requested_streams = 1017221410 + parent = 'parent-995424086' - response = client.create_read_session(table_reference, - requested_streams) + response = client.create_read_session(table_reference, parent) assert expected_response == response assert len(channel.requests) == 1 expected_request = storage_pb2.CreateReadSessionRequest( - table_reference=table_reference, - requested_streams=requested_streams) + table_reference=table_reference, parent=parent) actual_request = channel.requests[0][1] assert expected_request == actual_request @@ -103,10 +101,10 @@ def test_create_read_session_exception(self): # Setup request table_reference = {} - requested_streams = 1017221410 + parent = 'parent-995424086' with pytest.raises(CustomException): - client.create_read_session(table_reference, requested_streams) + client.create_read_session(table_reference, parent) def test_read_rows(self): # Setup Expected Response diff --git a/bigquery_storage/tests/unit/test_client.py b/bigquery_storage/tests/unit/test_client.py index 5371df8621d7..e671b9a3a92a 100644 --- a/bigquery_storage/tests/unit/test_client.py +++ b/bigquery_storage/tests/unit/test_client.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google.api_core.gapic_v1 import client_info import mock import pytest @@ -47,7 +48,24 @@ def transport_callable(credentials=None, default_class=None): ) -def test_create_read_session_w_parent(mock_transport, client_under_test): +def test_constructor_w_client_info(mock_transport): + from google.cloud.bigquery_storage_v1beta1 import client + + def transport_callable(credentials=None, default_class=None): + return mock_transport + + client_under_test = client.BigQueryStorageClient( + transport=transport_callable, + client_info=client_info.ClientInfo( + client_library_version='test-client-version', + ), + ) + + user_agent = client_under_test._client_info.to_user_agent() + assert 'test-client-version' in user_agent + + +def test_create_read_session(mock_transport, client_under_test): table_reference = types.TableReference( project_id='data-project-id', dataset_id='dataset_id', @@ -56,10 +74,7 @@ def test_create_read_session_w_parent(mock_transport, client_under_test): client_under_test.create_read_session( table_reference, - # TODO: requested_streams should be optional. - # Can remove after CL 220489471. - requested_streams=0, - parent='projects/other-project', + 'projects/other-project', ) expected_request = types.CreateReadSessionRequest(