From 2a134256f109eedeccf57979fffcbe2c34fdfb67 Mon Sep 17 00:00:00 2001 From: methylDragon Date: Tue, 28 Jan 2025 15:37:59 -0800 Subject: [PATCH] BUGFIX: Add support for nested definitions in Python mcap-protobuf-support (#1321) ### Changelog Bugfix: Support nested message definitions in mcap-protobuf-support ### Description Consider the following Protobuf definition: ``` message NestedTypeMessage { message NestedType { string data = 1; } NestedType nested = 1; } ``` Currently the decoder's use of the `protobuf.GetMessageClassesForFiles()` misses out the inner `NestedType` definition, causing decoding failures from missing nested descriptors, even though those descriptors would have been populated as part of the encoded file descriptors in an MCAP file. The error that is raised is something like: `FileDescriptorSet for type {schema.name} is missing that schema` This issue persists to an arbitrary level of nesting. This PR fixes that issue (including for deeply nested definitions) and adds a unit test to prevent regression. ### PS: How soon after merging this will it make it to PyPI? Signed-off-by: methylDragon --- .../mcap_protobuf/decoder.py | 22 ++++++++++++- .../mcap-protobuf-support/tests/generate.py | 20 ++++++++++++ .../test_proto/nested_type_message.proto | 21 +++++++++++++ .../tests/test_decoder.py | 7 ++++- .../test_proto/nested_type_message_pb2.py | 31 +++++++++++++++++++ 5 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 python/mcap-protobuf-support/tests/proto/test_proto/nested_type_message.proto create mode 100644 python/mcap-protobuf-support/tests/test_proto/nested_type_message_pb2.py diff --git a/python/mcap-protobuf-support/mcap_protobuf/decoder.py b/python/mcap-protobuf-support/mcap_protobuf/decoder.py index 28716c74be..cfe8ffb2c7 100644 --- a/python/mcap-protobuf-support/mcap_protobuf/decoder.py +++ b/python/mcap-protobuf-support/mcap_protobuf/decoder.py @@ -4,7 +4,7 @@ from google.protobuf.descriptor_pb2 import FileDescriptorProto, FileDescriptorSet from google.protobuf.descriptor_pool import DescriptorPool -from google.protobuf.message_factory import GetMessageClassesForFiles +from google.protobuf.message_factory import GetMessageClassesForFiles, GetMessageClass from mcap.decoder import DecoderFactory as McapDecoderFactory from mcap.exceptions import McapError @@ -59,6 +59,26 @@ def _add(fd: FileDescriptorProto): messages = GetMessageClassesForFiles([fd.name for fd in fds.file], pool) + # GetMessageClassesForFiles only fetches top-level message definitions in the file. + # + # We must traverse the top-level message definitions to recursively find and append any + # nested message definitions. + nested_messages = {} + for message_class in messages.values(): + nested_descriptions = list( + message_class.DESCRIPTOR.nested_types_by_name.values() + ) + while nested_descriptions: + nested_desc = nested_descriptions.pop() + nested_messages[nested_desc.full_name] = GetMessageClass( + nested_desc + ) + nested_descriptions.extend( + nested_desc.nested_types_by_name.values() + ) + + messages.update(nested_messages) + for name, klass in messages.items(): if name == schema.name: self._types[schema.id] = klass diff --git a/python/mcap-protobuf-support/tests/generate.py b/python/mcap-protobuf-support/tests/generate.py index 6d915b6051..7003700059 100644 --- a/python/mcap-protobuf-support/tests/generate.py +++ b/python/mcap-protobuf-support/tests/generate.py @@ -13,6 +13,7 @@ from test_proto.complex_message_pb2 import ComplexMessage # noqa: #402 from test_proto.intermediate_message_1_pb2 import IntermediateMessage1 # noqa: #402 from test_proto.intermediate_message_2_pb2 import IntermediateMessage2 # noqa: #402 +from test_proto.nested_type_message_pb2 import NestedTypeMessage # noqa: #402 from test_proto.simple_message_pb2 import SimpleMessage # noqa: #402 @@ -65,6 +66,7 @@ def generate_sample_data(output: IO[Any]): log_time=i * 1000, publish_time=i * 1000, ) + complex_message = ComplexMessage( intermediate1=IntermediateMessage1( simple=SimpleMessage(data=f"Field A {i}") @@ -79,4 +81,22 @@ def generate_sample_data(output: IO[Any]): log_time=i * 1000, publish_time=i * 1000, ) + + nested_type_message = NestedTypeMessage( + nested_type_message_1=NestedTypeMessage.NestedType1( + doubly_nested_type_message=NestedTypeMessage.NestedType1.DoublyNestedType( + data=f"Field C {i}" + ) + ), + nested_type_message_2=NestedTypeMessage.NestedType2( + data=f"Field D {i}" + ), + ) + writer.write_message( + topic="/nested_type_message", + message=nested_type_message, + log_time=i * 1000, + publish_time=i * 1000, + ) + output.seek(0) diff --git a/python/mcap-protobuf-support/tests/proto/test_proto/nested_type_message.proto b/python/mcap-protobuf-support/tests/proto/test_proto/nested_type_message.proto new file mode 100644 index 0000000000..54071f3e46 --- /dev/null +++ b/python/mcap-protobuf-support/tests/proto/test_proto/nested_type_message.proto @@ -0,0 +1,21 @@ +syntax = "proto3"; + + +package test_proto; + +message NestedTypeMessage { + message NestedType1 { + message DoublyNestedType { + string data = 1; + } + + DoublyNestedType doubly_nested_type_message = 1; + } + + message NestedType2 { + string data = 1; + } + + NestedType1 nested_type_message_1 = 1; + NestedType2 nested_type_message_2 = 2; +} diff --git a/python/mcap-protobuf-support/tests/test_decoder.py b/python/mcap-protobuf-support/tests/test_decoder.py index 4867a2a203..ad464f6293 100644 --- a/python/mcap-protobuf-support/tests/test_decoder.py +++ b/python/mcap-protobuf-support/tests/test_decoder.py @@ -23,12 +23,17 @@ def test_protobuf_decoder(): if channel.topic == "/complex_message": assert proto_msg.intermediate1.simple.data.startswith("Field A") assert proto_msg.intermediate2.simple.data.startswith("Field B") + elif channel.topic == "/nested_type_message": + assert proto_msg.nested_type_message_1.doubly_nested_type_message.data.startswith( + "Field C" + ) + assert proto_msg.nested_type_message_2.data.startswith("Field D") elif channel.topic == "/simple_message": assert proto_msg.data.startswith("Hello MCAP protobuf world") else: raise AssertionError(f"unrecognized topic {channel.topic}") - assert count == 20 + assert count == 30 def test_with_disordered_file_descriptors(): diff --git a/python/mcap-protobuf-support/tests/test_proto/nested_type_message_pb2.py b/python/mcap-protobuf-support/tests/test_proto/nested_type_message_pb2.py new file mode 100644 index 0000000000..0e82ea1cb8 --- /dev/null +++ b/python/mcap-protobuf-support/tests/test_proto/nested_type_message_pb2.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: test_proto/nested_type_message.proto +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n$test_proto/nested_type_message.proto\x12\ntest_proto\"\xd6\x02\n\x11NestedTypeMessage\x12H\n\x15nested_type_message_1\x18\x01 \x01(\x0b\x32).test_proto.NestedTypeMessage.NestedType1\x12H\n\x15nested_type_message_2\x18\x02 \x01(\x0b\x32).test_proto.NestedTypeMessage.NestedType2\x1a\x8f\x01\n\x0bNestedType1\x12^\n\x1a\x64oubly_nested_type_message\x18\x01 \x01(\x0b\x32:.test_proto.NestedTypeMessage.NestedType1.DoublyNestedType\x1a \n\x10\x44oublyNestedType\x12\x0c\n\x04\x64\x61ta\x18\x01 \x01(\t\x1a\x1b\n\x0bNestedType2\x12\x0c\n\x04\x64\x61ta\x18\x01 \x01(\tb\x06proto3') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'test_proto.nested_type_message_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _NESTEDTYPEMESSAGE._serialized_start=53 + _NESTEDTYPEMESSAGE._serialized_end=395 + _NESTEDTYPEMESSAGE_NESTEDTYPE1._serialized_start=223 + _NESTEDTYPEMESSAGE_NESTEDTYPE1._serialized_end=366 + _NESTEDTYPEMESSAGE_NESTEDTYPE1_DOUBLYNESTEDTYPE._serialized_start=334 + _NESTEDTYPEMESSAGE_NESTEDTYPE1_DOUBLYNESTEDTYPE._serialized_end=366 + _NESTEDTYPEMESSAGE_NESTEDTYPE2._serialized_start=368 + _NESTEDTYPEMESSAGE_NESTEDTYPE2._serialized_end=395 +# @@protoc_insertion_point(module_scope)