Skip to content

Commit

Permalink
feat(spanner): add support for Proto Columns (#1084)
Browse files Browse the repository at this point in the history
* feat: Proto Columns Feature (#909)

* feat: adding proto autogenerated code changes for proto column feature

* feat: add implementation for Proto columns DDL

* feat: add implementation for Proto columns DML

* feat: add implementation for Proto columns DQL

* feat: add NoneType check during Proto deserialization

* feat: add code changes for Proto DDL support

* feat: add required proto files to execute samples and tests

* feat: add sample snippets for Proto columns DDL

* feat: add tests for proto columns ddl, dml, dql snippets

* feat: code refactoring

* feat: remove staging endpoint from snippets.py

* feat: comment refactor

* feat: add license file

* feat: update proto column data in insertion sample

* feat: move column_info argument to the end to avoid breaking code

* feat: Proto column feature tests and samples (#921)

* feat: add integration tests for Proto Columns

* feat: add unit tests for Proto Columns

* feat: update tests to add column_info argument at end

* feat: remove deepcopy during deserialization of proto message

* feat: tests refactoring

* feat: integration tests refactoring

* feat: samples and sample tests refactoring

* feat: lint tests folder

* feat:lint samples directory

* feat: stop running emulator with proto ddl commands

* feat: close the file after reading

* feat: update protobuf version lower bound to >3.20 to check proto message compatibility

* feat: update setup for snippets_tests.py file

* feat: add integration tests

* feat: remove duplicate integration tests

* feat: add proto_descriptor parameter to required tests

* feat: add compatibility tests between Proto message, Bytes and Proto Enum, Int64

* feat: add index tests for proto columns

* feat: replace duplicates with sample data

* feat: update protobuf lower bound version in setup.py file to add support for proto messages and enum

* feat: lint fixes

* feat: lint fix

* feat: tests refactoring

* feat: change comment from dml to dql for read

* feat: tests refactoring for update db operation

* feat: rever autogenerated code

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* fix: fix code

* fix: fix code

* fix(spanner): fix code

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* fix(spanner): skip emulator due to b/338557401

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* fix(spanner): remove samples

* fix(spanner): update coverage

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* chore(spanner): update coverage

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* fix(spanner): add samples and update proto schema

* fix(spanner): update samples database and emulator DDL

* fix(spanner): update admin test to use autogenerated interfaces

* fix(spanner): comment refactoring

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
harshachinta and gcf-owl-bot[bot] authored May 16, 2024
1 parent bc71fe9 commit 3ca2689
Show file tree
Hide file tree
Showing 32 changed files with 1,223 additions and 71 deletions.
49 changes: 45 additions & 4 deletions google/cloud/spanner_v1/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@
import decimal
import math
import time
import base64

from google.protobuf.struct_pb2 import ListValue
from google.protobuf.struct_pb2 import Value
from google.protobuf.message import Message
from google.protobuf.internal.enum_type_wrapper import EnumTypeWrapper

from google.api_core import datetime_helpers
from google.cloud._helpers import _date_from_iso8601_date
Expand Down Expand Up @@ -204,6 +207,12 @@ def _make_value_pb(value):
return Value(null_value="NULL_VALUE")
else:
return Value(string_value=value)
if isinstance(value, Message):
value = value.SerializeToString()
if value is None:
return Value(null_value="NULL_VALUE")
else:
return Value(string_value=base64.b64encode(value))

raise ValueError("Unknown type: %s" % (value,))

Expand Down Expand Up @@ -232,7 +241,7 @@ def _make_list_value_pbs(values):
return [_make_list_value_pb(row) for row in values]


def _parse_value_pb(value_pb, field_type):
def _parse_value_pb(value_pb, field_type, field_name, column_info=None):
"""Convert a Value protobuf to cell data.
:type value_pb: :class:`~google.protobuf.struct_pb2.Value`
Expand All @@ -241,6 +250,18 @@ def _parse_value_pb(value_pb, field_type):
:type field_type: :class:`~google.cloud.spanner_v1.types.Type`
:param field_type: type code for the value
:type field_name: str
:param field_name: column name
:type column_info: dict
:param column_info: (Optional) dict of column name and column information.
An object where column names as keys and custom objects as corresponding
values for deserialization. It's specifically useful for data types like
protobuf where deserialization logic is on user-specific code. When provided,
the custom object enables deserialization of backend-received column data.
If not provided, data remains serialized as bytes for Proto Messages and
integer for Proto Enums.
:rtype: varies on field_type
:returns: value extracted from value_pb
:raises ValueError: if unknown type is passed
Expand Down Expand Up @@ -273,18 +294,38 @@ def _parse_value_pb(value_pb, field_type):
return DatetimeWithNanoseconds.from_rfc3339(value_pb.string_value)
elif type_code == TypeCode.ARRAY:
return [
_parse_value_pb(item_pb, field_type.array_element_type)
_parse_value_pb(
item_pb, field_type.array_element_type, field_name, column_info
)
for item_pb in value_pb.list_value.values
]
elif type_code == TypeCode.STRUCT:
return [
_parse_value_pb(item_pb, field_type.struct_type.fields[i].type_)
_parse_value_pb(
item_pb, field_type.struct_type.fields[i].type_, field_name, column_info
)
for (i, item_pb) in enumerate(value_pb.list_value.values)
]
elif type_code == TypeCode.NUMERIC:
return decimal.Decimal(value_pb.string_value)
elif type_code == TypeCode.JSON:
return JsonObject.from_str(value_pb.string_value)
elif type_code == TypeCode.PROTO:
bytes_value = base64.b64decode(value_pb.string_value)
if column_info is not None and column_info.get(field_name) is not None:
default_proto_message = column_info.get(field_name)
if isinstance(default_proto_message, Message):
proto_message = type(default_proto_message)()
proto_message.ParseFromString(bytes_value)
return proto_message
return bytes_value
elif type_code == TypeCode.ENUM:
int_value = int(value_pb.string_value)
if column_info is not None and column_info.get(field_name) is not None:
proto_enum = column_info.get(field_name)
if isinstance(proto_enum, EnumTypeWrapper):
return proto_enum.Name(int_value)
return int_value
else:
raise ValueError("Unknown type: %s" % (field_type,))

Expand All @@ -305,7 +346,7 @@ def _parse_list_value_pbs(rows, row_type):
for row in rows:
row_data = []
for value_pb, field in zip(row.values, row_type.fields):
row_data.append(_parse_value_pb(value_pb, field.type_))
row_data.append(_parse_value_pb(value_pb, field.type_, field.name))
result.append(row_data)
return result

Expand Down
110 changes: 110 additions & 0 deletions google/cloud/spanner_v1/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
"""Custom data types for spanner."""

import json
import types

from google.protobuf.message import Message
from google.protobuf.internal.enum_type_wrapper import EnumTypeWrapper


class JsonObject(dict):
Expand Down Expand Up @@ -71,3 +75,109 @@ def serialize(self):
return json.dumps(self._array_value, sort_keys=True, separators=(",", ":"))

return json.dumps(self, sort_keys=True, separators=(",", ":"))


def _proto_message(bytes_val, proto_message_object):
"""Helper for :func:`get_proto_message`.
parses serialized protocol buffer bytes data into proto message.
Args:
bytes_val (bytes): bytes object.
proto_message_object (Message): Message object for parsing
Returns:
Message: parses serialized protocol buffer data into this message.
Raises:
ValueError: if the input proto_message_object is not of type Message
"""
if isinstance(bytes_val, types.NoneType):
return None

if not isinstance(bytes_val, bytes):
raise ValueError("Expected input bytes_val to be a string")

proto_message = proto_message_object.__deepcopy__()
proto_message.ParseFromString(bytes_val)
return proto_message


def _proto_enum(int_val, proto_enum_object):
"""Helper for :func:`get_proto_enum`.
parses int value into string containing the name of an enum value.
Args:
int_val (int): integer value.
proto_enum_object (EnumTypeWrapper): Enum object.
Returns:
str: string containing the name of an enum value.
Raises:
ValueError: if the input proto_enum_object is not of type EnumTypeWrapper
"""
if isinstance(int_val, types.NoneType):
return None

if not isinstance(int_val, int):
raise ValueError("Expected input int_val to be a integer")

return proto_enum_object.Name(int_val)


def get_proto_message(bytes_string, proto_message_object):
"""parses serialized protocol buffer bytes' data or its list into proto message or list of proto message.
Args:
bytes_string (bytes or list[bytes]): bytes object.
proto_message_object (Message): Message object for parsing
Returns:
Message or list[Message]: parses serialized protocol buffer data into this message.
Raises:
ValueError: if the input proto_message_object is not of type Message
"""
if isinstance(bytes_string, types.NoneType):
return None

if not isinstance(proto_message_object, Message):
raise ValueError("Input proto_message_object should be of type Message")

if not isinstance(bytes_string, (bytes, list)):
raise ValueError(
"Expected input bytes_string to be a string or list of strings"
)

if isinstance(bytes_string, list):
return [_proto_message(item, proto_message_object) for item in bytes_string]

return _proto_message(bytes_string, proto_message_object)


def get_proto_enum(int_value, proto_enum_object):
"""parses int or list of int values into enum or list of enum values.
Args:
int_value (int or list[int]): list of integer value.
proto_enum_object (EnumTypeWrapper): Enum object.
Returns:
str or list[str]: list of strings containing the name of enum value.
Raises:
ValueError: if the input int_list is not of type list
"""
if isinstance(int_value, types.NoneType):
return None

if not isinstance(proto_enum_object, EnumTypeWrapper):
raise ValueError("Input proto_enum_object should be of type EnumTypeWrapper")

if not isinstance(int_value, (int, list)):
raise ValueError("Expected input int_value to be a integer or list of integers")

if isinstance(int_value, list):
return [_proto_enum(item, proto_enum_object) for item in int_value]

return _proto_enum(int_value, proto_enum_object)
20 changes: 19 additions & 1 deletion google/cloud/spanner_v1/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ class Database(object):
:type enable_drop_protection: boolean
:param enable_drop_protection: (Optional) Represents whether the database
has drop protection enabled or not.
:type proto_descriptors: bytes
:param proto_descriptors: (Optional) Proto descriptors used by CREATE/ALTER PROTO BUNDLE
statements in 'ddl_statements' above.
"""

_spanner_api = None
Expand All @@ -152,6 +155,7 @@ def __init__(
database_dialect=DatabaseDialect.DATABASE_DIALECT_UNSPECIFIED,
database_role=None,
enable_drop_protection=False,
proto_descriptors=None,
):
self.database_id = database_id
self._instance = instance
Expand All @@ -173,6 +177,7 @@ def __init__(
self._enable_drop_protection = enable_drop_protection
self._reconciling = False
self._directed_read_options = self._instance._client.directed_read_options
self._proto_descriptors = proto_descriptors

if pool is None:
pool = BurstyPool(database_role=database_role)
Expand Down Expand Up @@ -382,6 +387,14 @@ def enable_drop_protection(self):
def enable_drop_protection(self, value):
self._enable_drop_protection = value

@property
def proto_descriptors(self):
"""Proto Descriptors for this database.
:rtype: bytes
:returns: bytes representing the proto descriptors for this database
"""
return self._proto_descriptors

@property
def logger(self):
"""Logger used by the database.
Expand Down Expand Up @@ -465,6 +478,7 @@ def create(self):
extra_statements=list(self._ddl_statements),
encryption_config=self._encryption_config,
database_dialect=self._database_dialect,
proto_descriptors=self._proto_descriptors,
)
future = api.create_database(request=request, metadata=metadata)
return future
Expand Down Expand Up @@ -501,6 +515,7 @@ def reload(self):
metadata = _metadata_with_prefix(self.name)
response = api.get_database_ddl(database=self.name, metadata=metadata)
self._ddl_statements = tuple(response.statements)
self._proto_descriptors = response.proto_descriptors
response = api.get_database(name=self.name, metadata=metadata)
self._state = DatabasePB.State(response.state)
self._create_time = response.create_time
Expand All @@ -514,7 +529,7 @@ def reload(self):
self._enable_drop_protection = response.enable_drop_protection
self._reconciling = response.reconciling

def update_ddl(self, ddl_statements, operation_id=""):
def update_ddl(self, ddl_statements, operation_id="", proto_descriptors=None):
"""Update DDL for this database.
Apply any configured schema from :attr:`ddl_statements`.
Expand All @@ -526,6 +541,8 @@ def update_ddl(self, ddl_statements, operation_id=""):
:param ddl_statements: a list of DDL statements to use on this database
:type operation_id: str
:param operation_id: (optional) a string ID for the long-running operation
:type proto_descriptors: bytes
:param proto_descriptors: (optional) Proto descriptors used by CREATE/ALTER PROTO BUNDLE statements
:rtype: :class:`google.api_core.operation.Operation`
:returns: an operation instance
Expand All @@ -539,6 +556,7 @@ def update_ddl(self, ddl_statements, operation_id=""):
database=self.name,
statements=ddl_statements,
operation_id=operation_id,
proto_descriptors=proto_descriptors,
)

future = api.update_database_ddl(request=request, metadata=metadata)
Expand Down
7 changes: 7 additions & 0 deletions google/cloud/spanner_v1/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ def database(
enable_drop_protection=False,
# should be only set for tests if tests want to use interceptors
enable_interceptors_in_tests=False,
proto_descriptors=None,
):
"""Factory to create a database within this instance.
Expand Down Expand Up @@ -478,9 +479,14 @@ def database(
:param enable_interceptors_in_tests: (Optional) should only be set to True
for tests if the tests want to use interceptors.
:type proto_descriptors: bytes
:param proto_descriptors: (Optional) Proto descriptors used by CREATE/ALTER PROTO BUNDLE
statements in 'ddl_statements' above.
:rtype: :class:`~google.cloud.spanner_v1.database.Database`
:returns: a database owned by this instance.
"""

if not enable_interceptors_in_tests:
return Database(
database_id,
Expand All @@ -492,6 +498,7 @@ def database(
database_dialect=database_dialect,
database_role=database_role,
enable_drop_protection=enable_drop_protection,
proto_descriptors=proto_descriptors,
)
else:
return TestDatabase(
Expand Down
34 changes: 34 additions & 0 deletions google/cloud/spanner_v1/param_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from google.cloud.spanner_v1 import TypeAnnotationCode
from google.cloud.spanner_v1 import TypeCode
from google.cloud.spanner_v1 import StructType
from google.protobuf.message import Message
from google.protobuf.internal.enum_type_wrapper import EnumTypeWrapper


# Scalar parameter types
Expand Down Expand Up @@ -73,3 +75,35 @@ def Struct(fields):
:returns: the appropriate struct-type protobuf
"""
return Type(code=TypeCode.STRUCT, struct_type=StructType(fields=fields))


def ProtoMessage(proto_message_object):
"""Construct a proto message type description protobuf.
:type proto_message_object: :class:`google.protobuf.message.Message`
:param proto_message_object: the proto message instance
:rtype: :class:`type_pb2.Type`
:returns: the appropriate proto-message-type protobuf
"""
if not isinstance(proto_message_object, Message):
raise ValueError("Expected input object of type Proto Message.")
return Type(
code=TypeCode.PROTO, proto_type_fqn=proto_message_object.DESCRIPTOR.full_name
)


def ProtoEnum(proto_enum_object):
"""Construct a proto enum type description protobuf.
:type proto_enum_object: :class:`google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper`
:param proto_enum_object: the proto enum instance
:rtype: :class:`type_pb2.Type`
:returns: the appropriate proto-enum-type protobuf
"""
if not isinstance(proto_enum_object, EnumTypeWrapper):
raise ValueError("Expected input object of type Proto Enum")
return Type(
code=TypeCode.ENUM, proto_type_fqn=proto_enum_object.DESCRIPTOR.full_name
)
Loading

0 comments on commit 3ca2689

Please sign in to comment.