Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serialize default values in oneofs when calling to_dict() or to_json() #110

Merged
merged 12 commits into from
Jul 25, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 44 additions & 8 deletions betterproto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,11 @@ def __bytes__(self) -> bytes:
serialize_empty = True

if value == self._get_field_default(field_name) and not (
selected_in_group or serialize_empty
selected_in_group
or serialize_empty
or self._include_default_value_for_oneof(
field_name=field_name, meta=meta
)
):
# Default (zero) values are not serialized. Two exceptions are
# if this is the selected oneof item or if we know we have to
Expand Down Expand Up @@ -719,6 +723,13 @@ def _postprocess_single(

return value

def _include_default_value_for_oneof(
self, field_name: str, meta: FieldMetadata
) -> bool:
return (
meta.group is not None and self._group_current.get(meta.group) == field_name
)

def parse(self: T, data: bytes) -> T:
"""
Parse the binary encoded Protobuf into this message instance. This
Expand Down Expand Up @@ -791,10 +802,22 @@ def to_dict(
cased_name = casing(field_name).rstrip("_") # type: ignore
if meta.proto_type == "message":
if isinstance(v, datetime):
if v != DATETIME_ZERO or include_default_values:
if (
v != DATETIME_ZERO
or include_default_values
or self._include_default_value_for_oneof(
field_name=field_name, meta=meta
)
):
output[cased_name] = _Timestamp.timestamp_to_json(v)
elif isinstance(v, timedelta):
if v != timedelta(0) or include_default_values:
if (
v != timedelta(0)
or include_default_values
or self._include_default_value_for_oneof(
field_name=field_name, meta=meta
)
):
output[cased_name] = _Duration.delta_to_json(v)
elif meta.wraps:
if v is not None or include_default_values:
Expand All @@ -804,17 +827,28 @@ def to_dict(
v = [i.to_dict(casing, include_default_values) for i in v]
if v or include_default_values:
output[cased_name] = v
else:
if v._serialized_on_wire or include_default_values:
output[cased_name] = v.to_dict(casing, include_default_values)
elif (
v._serialized_on_wire
or include_default_values
or self._include_default_value_for_oneof(
field_name=field_name, meta=meta
)
):
output[cased_name] = v.to_dict(casing, include_default_values,)
elif meta.proto_type == "map":
for k in v:
if hasattr(v[k], "to_dict"):
v[k] = v[k].to_dict(casing, include_default_values)

if v or include_default_values:
output[cased_name] = v
elif v != self._get_field_default(field_name) or include_default_values:
elif (
v != self._get_field_default(field_name)
or include_default_values
or self._include_default_value_for_oneof(
field_name=field_name, meta=meta
)
):
if meta.proto_type in INT_64_TYPES:
if isinstance(v, list):
output[cased_name] = [str(n) for n in v]
Expand Down Expand Up @@ -866,7 +900,9 @@ def from_dict(self: T, value: dict) -> T:
elif meta.wraps:
setattr(self, field_name, value[key])
else:
v.from_dict(value[key])
v = v.from_dict(value[key])
if v is not None:
setattr(self, field_name, v)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused how from_dict ever worked properly, if message fields were not assigned to their parent before..

any idea of why this was needed, or why from_dict did actually work?

Also, lines 930-931 assign v to the field. Perhaps just assigning v= would suffice?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I realized it!

v is an existing Message object. The required behavior is that the data from the map is merged into the existing fields. I think no assignment is needed here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah correct, the v = v.from_dict(value[key]) isn't needed, I'm just used to transforming objects vs mutating them that I wrote it like that! Also, it looks like I can just move lines 930-931 out an indentation layer and achieve the same effect 😄

elif meta.map_types and meta.map_types[1] == TYPE_MESSAGE:
v = getattr(self, field_name)
cls = self._betterproto.cls_by_field[field_name + ".value"]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
syntax = "proto3";

import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";
import "google/protobuf/wrappers.proto";

message Message{
int64 value = 1;
}

message NestedMessage{
int64 id = 1;
oneof value_type{
Message wrapped_message_value = 2;
}
}

message Test{
oneof value_type {
bool bool_value = 1;
int64 int64_value = 2;
google.protobuf.Timestamp timestamp_value = 3;
google.protobuf.Duration duration_value = 4;
Message wrapped_message_value = 5;
NestedMessage wrapped_nested_message_value = 6;
google.protobuf.BoolValue wrapped_bool_value = 7;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import pytest
import datetime

import betterproto
from betterproto.tests.output_betterproto.oneof_default_value_serialization import (
Test,
Message,
NestedMessage,
)


def assert_round_trip_serialization_works(message: Test) -> None:
assert betterproto.which_one_of(message, "value_type") == betterproto.which_one_of(
Test().from_json(message.to_json()), "value_type"
)


def test_oneof_default_value_serialization_works_for_all_values():
"""
Serialization from message with oneof set to default -> JSON -> message should keep
default value field intact.
"""

test_cases = [
Test(bool_value=False),
Test(int64_value=0),
Test(
timestamp_value=datetime.datetime(
year=1970,
month=1,
day=1,
hour=0,
minute=0,
tzinfo=datetime.timezone.utc,
)
),
Test(duration_value=datetime.timedelta(0)),
Test(wrapped_message_value=Message(value=0)),
# NOTE: Do NOT use betterproto.BoolValue here, it will cause JSON serialization
# errors.
# TODO: Do we want to allow use of BoolValue directly within a wrapped field or
# should we simply hard fail here?
Test(wrapped_bool_value=False),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point!

It seems odd that passing a BoolValue to a field that is defined as such will break the library.
Of course, it was meant as a convenience that the BoolValue is not required, but breaking it goes a bit far.

It does raise some questions.
Do we always store a bool internally? Then what happens if you store a BoolValue, then read it again.. it becomes a bool. That may be confusing, and potentially cause bugs.

We may need to look into this, in a separate issue.

Its fine with me to keep the TODO

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah if I had to make a choice (with just thinking about this briefly) I would err on the side of failing fast when attempting to assign a non-primitive to a wrapped field. That at least makes the usage of the field unambiguous

]
for message in test_cases:
assert_round_trip_serialization_works(message)


def test_oneof_no_default_values_passed():
message = Test()
assert (
betterproto.which_one_of(message, "value_type")
== betterproto.which_one_of(Test().from_json(message.to_json()), "value_type")
== ("", None)
)


def test_oneof_nested_oneof_messages_are_serialized_with_defaults():
"""
Nested messages with oneofs should also be handled
"""
message = Test(
wrapped_nested_message_value=NestedMessage(
id=0, wrapped_message_value=Message(value=0)
)
)
assert (
betterproto.which_one_of(message, "value_type")
== betterproto.which_one_of(Test().from_json(message.to_json()), "value_type")
== (
"wrapped_nested_message_value",
NestedMessage(id=0, wrapped_message_value=Message(value=0)),
)
)
16 changes: 9 additions & 7 deletions betterproto/tests/inputs/oneof_enum/test_oneof_enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,36 @@
from betterproto.tests.util import get_test_case_json_data


@pytest.mark.xfail
def test_which_one_of_returns_enum_with_default_value():
"""
returns first field when it is enum and set with default value
"""
message = Test()
message.from_json(get_test_case_json_data("oneof_enum", "oneof_enum-enum-0.json"))
assert message.move is None

assert message.move == Move(
x=0, y=0
) # Proto3 will default this as there is no null
assert message.signal == Signal.PASS
assert betterproto.which_one_of(message, "action") == ("signal", Signal.PASS)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change needed to solve the bug? As I understand the bug is in to_dict and not from_dict..

I think the idea that proto doesn't have null values might actually be somewhat muddled, and incorrect. Because values can be skipped when they are sent over the wire, effectively making them undefined. However, the protobuf libraries will then simply return the default value for those skipped fields. That behavior is indeed part of protobuf specification, but for oneof groups, I consider it not part of the spec that all other fields should have a default value as well (unless its actually wrtten in the spec, sorry I didn't have time to read it 😅 ). At least it doesn't make much sense to me why it would be needed.

Unless it is somehow needed for this bugfix, I would suggest to exclude that part from this PR, because its a functional change (and not sure if its an improvement). And that is a bit harder to judge than the bugfix, which we could merge quickly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is actually default behavior in Google's protobuf implementation as well. What's actually happening here is that the move field is never itself null but it isn't actually sent over the wire - calling to_dict() should only show signal there. I think what's confusing is that a oneof field can have nothing set, an attribute set to a default value, OR an attrbiute set to a non-default value. But, on the actual message object, the attributes themselves are never actually null, they're always in a default state. I replicated this behaviour using these two messages:

message Point{
    int64 x = 1;
    int64 y = 2;
}

message Test{
    oneof test_value {
        Point point = 1;
        google.protobuf.BoolValue bool_value = 2;
        int64 int_value = 3;
    }
}

Compiling with the Google proto impl I then checked the following:

In [4]: t = Test()

In [5]: t
Out[5]:

In [6]: t.point
Out[6]:

In [7]: t.point.x
Out[7]: 0
...

In [9]: type(t.point.x)
Out[9]: int

In [10]: type(t.point)
Out[10]: foo.Point

In [11]: type(t.int_value)
Out[11]: int

In [12]: t.bool_value
Out[12]:

In [13]: type(t.bool_value)
Out[13]: google.protobuf.wrappers_pb2.BoolValue

In [14]: from google.protobuf.json_format import MessageToDict

In [15]: MessageToDict(t)
Out[15]: {}

In [16]: t.point.x = 1

In [17]: MessageToDict(t)
Out[17]: {'point': {'x': '1'}}

In [18]: t.int_value = 1

In [19]: MessageToDict(t)
Out[19]: {'intValue': '1'}

In [20]: t.bool_value.value = True

In [21]: t
Out[21]:
bool_value {
  value: true
}

In [22]: MessageToDict(t)
Out[22]: {'boolValue': True}

I can either add an assert to this test case or a separate test case demonstrating that betterprotos has similar behaviour


@pytest.mark.xfail
def test_which_one_of_returns_enum_with_non_default_value():
"""
returns first field when it is enum and set with non default value
"""
message = Test()
message.from_json(get_test_case_json_data("oneof_enum", "oneof_enum-enum-1.json"))
assert message.move is None
assert message.signal == Signal.PASS
assert message.move == Move(
x=0, y=0
) # Proto3 will default this as there is no null
assert message.signal == Signal.RESIGN
assert betterproto.which_one_of(message, "action") == ("signal", Signal.RESIGN)


@pytest.mark.xfail
def test_which_one_of_returns_second_field_when_set():
message = Test()
message.from_json(get_test_case_json_data("oneof_enum"))
assert message.move == Move(x=2, y=3)
assert message.signal == 0
assert message.signal == Signal.PASS
assert betterproto.which_one_of(message, "action") == ("move", Move(x=2, y=3))
11 changes: 11 additions & 0 deletions betterproto/tests/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,14 @@ class TestParentMessage(betterproto.Message):
"someDouble": 1.2,
"someMessage": {"someOtherInt": 0},
}


def test_oneof_default_value_set_causes_writes_wire():
@dataclass
class Foo(betterproto.Message):
bar: int = betterproto.int32_field(1, group="group1")
baz: str = betterproto.string_field(2, group="group1")

assert bytes(Foo(bar=0)) != b""
assert bytes(Foo(baz="")) == b"" # Baz is just an empty string
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean its not possible to detect which_one_of if one of the groups is a String and the value was the empty string?

Same with other default values? 😕

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch - this actually works with the other default values but not for a string at the moment, I'll get a fix in 😄

assert bytes(Foo()) == b""