From d108cc8e45d1bdbe35c5e226035f792391c5f442 Mon Sep 17 00:00:00 2001 From: Joshua Humphries <2035234+jhump@users.noreply.github.com> Date: Wed, 6 Dec 2023 13:16:58 -0500 Subject: [PATCH] Preserve interpreted options when input to compiler is already-built FileDescriptorProto (#217) When a resolver provides an input to the compiler in the form of a `*descriptorpb.FileDescriptorProto` (instead of as source code, or as a fully-baked `protoreflect.FileDescriptor`), the step that interprets options could inadvertently clobber field options (thinking it was the case that the options were empty, just because there were no uninterpreted options to process). Also, the `linker.Result.CanonicalProto()` method would also clobber options, even if it had no canonical serialized bytes to replace them. This commit fixes both of these issues. --- linker/descriptors.go | 37 ++++++++++++----------- linker/linker.go | 13 ++++++-- options/options.go | 11 +++++-- options/options_test.go | 67 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 22 deletions(-) diff --git a/linker/descriptors.go b/linker/descriptors.go index 2e6fea66..d4f82fbd 100644 --- a/linker/descriptors.go +++ b/linker/descriptors.go @@ -325,10 +325,19 @@ func (r *result) CanonicalProto() *descriptorpb.FileDescriptorProto { return fd } +func (r *result) storeOptionBytes(opts, origOpts proto.Message) { + optionBytes := r.optionBytes[origOpts] + if len(optionBytes) == 0 { + // If we don't know about this options message, leave it alone. + return + } + proto.Reset(opts) + opts.ProtoReflect().SetUnknown(optionBytes) +} + func (r *result) storeOptionBytesInFile(fd, origFd *descriptorpb.FileDescriptorProto) { if fd.Options != nil { - fd.Options.Reset() - fd.Options.ProtoReflect().SetUnknown(r.optionBytes[origFd.Options]) + r.storeOptionBytes(fd.Options, origFd.Options) } for i, md := range fd.MessageType { @@ -349,15 +358,13 @@ func (r *result) storeOptionBytesInFile(fd, origFd *descriptorpb.FileDescriptorP for i, sd := range fd.Service { origSd := origFd.Service[i] if sd.Options != nil { - sd.Options.Reset() - sd.Options.ProtoReflect().SetUnknown(r.optionBytes[origSd.Options]) + r.storeOptionBytes(sd.Options, origSd.Options) } for j, mtd := range sd.Method { origMtd := origSd.Method[j] if mtd.Options != nil { - mtd.Options.Reset() - mtd.Options.ProtoReflect().SetUnknown(r.optionBytes[origMtd.Options]) + r.storeOptionBytes(mtd.Options, origMtd.Options) } } } @@ -372,8 +379,7 @@ func (r *result) storeOptionBytesInMessage(md, origMd *descriptorpb.DescriptorPr } if md.Options != nil { - md.Options.Reset() - md.Options.ProtoReflect().SetUnknown(r.optionBytes[origMd.Options]) + r.storeOptionBytes(md.Options, origMd.Options) } for i, fld := range md.Field { @@ -384,16 +390,14 @@ func (r *result) storeOptionBytesInMessage(md, origMd *descriptorpb.DescriptorPr for i, ood := range md.OneofDecl { origOod := origMd.OneofDecl[i] if ood.Options != nil { - ood.Options.Reset() - ood.Options.ProtoReflect().SetUnknown(r.optionBytes[origOod.Options]) + r.storeOptionBytes(ood.Options, origOod.Options) } } for i, exr := range md.ExtensionRange { origExr := origMd.ExtensionRange[i] if exr.Options != nil { - exr.Options.Reset() - exr.Options.ProtoReflect().SetUnknown(r.optionBytes[origExr.Options]) + r.storeOptionBytes(exr.Options, origExr.Options) } } @@ -415,23 +419,20 @@ func (r *result) storeOptionBytesInMessage(md, origMd *descriptorpb.DescriptorPr func (r *result) storeOptionBytesInEnum(ed, origEd *descriptorpb.EnumDescriptorProto) { if ed.Options != nil { - ed.Options.Reset() - ed.Options.ProtoReflect().SetUnknown(r.optionBytes[origEd.Options]) + r.storeOptionBytes(ed.Options, origEd.Options) } for i, evd := range ed.Value { origEvd := origEd.Value[i] if evd.Options != nil { - evd.Options.Reset() - evd.Options.ProtoReflect().SetUnknown(r.optionBytes[origEvd.Options]) + r.storeOptionBytes(evd.Options, origEvd.Options) } } } func (r *result) storeOptionBytesInField(fld, origFld *descriptorpb.FieldDescriptorProto) { if fld.Options != nil { - fld.Options.Reset() - fld.Options.ProtoReflect().SetUnknown(r.optionBytes[origFld.Options]) + r.storeOptionBytes(fld.Options, origFld.Options) } } diff --git a/linker/linker.go b/linker/linker.go index 6a2cfd5d..cb70e84e 100644 --- a/linker/linker.go +++ b/linker/linker.go @@ -132,8 +132,8 @@ type Result interface { // will be serialized in a canonical way. The "canonical" way matches // the way that "protoc" emits option values, which is a way that // mostly matches the way options are defined in source, including - // ordering and de-structuring. Unlike the FileDescriptorProto() method, this - // method is more expensive and results in a new descriptor proto + // ordering and de-structuring. Unlike the FileDescriptorProto() method, + // this method is more expensive and results in a new descriptor proto // being constructed with each call. // // The returned value will have all options (fields of the various @@ -141,6 +141,15 @@ type Result interface { // fields. So the returned value will serialize as desired, but it // is otherwise not useful since all option values are treated as // unknown. + // + // Note that CanonicalProto is a no-op if the options in this file + // were not interpreted by this module (e.g. the underlying descriptor + // proto was provided, with options already interpreted, instead of + // parsed from source). If the underlying descriptor proto was provided, + // but with a mix of interpreted and uninterpreted options, this method + // will effectively clear the already-interpreted fields and only the + // options actually interpreted by the compile operation will be + // retained. CanonicalProto() *descriptorpb.FileDescriptorProto // RemoveAST drops the AST information from this result. diff --git a/options/options.go b/options/options.go index b2915815..222fe8e1 100644 --- a/options/options.go +++ b/options/options.go @@ -326,8 +326,11 @@ func (interp *interpreter) interpretMessageOptions(fqn string, md *descriptorpb. return nil } +var emptyFieldOptions = &descriptorpb.FieldOptions{} + func (interp *interpreter) interpretFieldOptions(fqn string, fld *descriptorpb.FieldDescriptorProto) error { opts := fld.GetOptions() + emptyOptionsAlreadyPresent := opts != nil && len(opts.GetUninterpretedOption()) == 0 // First process pseudo-options if len(opts.GetUninterpretedOption()) > 0 { @@ -338,8 +341,12 @@ func (interp *interpreter) interpretFieldOptions(fqn string, fld *descriptorpb.F // Must re-check length of uninterpreted options since above step could remove some. if len(opts.GetUninterpretedOption()) == 0 { - // If no options to interpret, clear out options. - fld.Options = nil + // If the message has no other interpreted options, we clear it out. But don't + // do that if the descriptor came in with empty options or if it already has + // interpreted option fields. + if opts != nil && !emptyOptionsAlreadyPresent && proto.Equal(fld.Options, emptyFieldOptions) { + fld.Options = nil + } return nil } diff --git a/options/options_test.go b/options/options_test.go index 4bb280f7..f210771b 100644 --- a/options/options_test.go +++ b/options/options_test.go @@ -29,7 +29,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/reflect/protodesc" "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/reflect/protoregistry" "google.golang.org/protobuf/testing/protocmp" "google.golang.org/protobuf/types/descriptorpb" @@ -472,3 +474,68 @@ func TestInterpretOptionsWithoutAST(t *testing.T) { require.Empty(t, diff) } } + +//nolint:errcheck +func TestInterpretOptionsWithoutASTNoOp(t *testing.T) { + t.Parallel() + // Similar to above test, where we have descriptor protos and no AST. But this + // time, interpreting options is a no-op because they all have options already. + + // First compile from source, so we interpret options with an AST + fileNames := []string{"desc_test_options.proto", "desc_test_comments.proto", "desc_test_complex.proto"} + compiler := &protocompile.Compiler{ + Resolver: protocompile.WithStandardImports(&protocompile.SourceResolver{ + ImportPaths: []string{"../internal/testdata"}, + }), + } + files, err := compiler.Compile(context.Background(), fileNames...) + require.NoError(t, err) + + // Now compile with just the protos, with options already interpreted, to make + // sure it's a no-op and that we don't mangle any already-interpreted options. + compiler = &protocompile.Compiler{ + Resolver: protocompile.WithStandardImports(protocompile.ResolverFunc( + func(name string) (protocompile.SearchResult, error) { + var res protocompile.SearchResult + fd := files.FindFileByPath(name) + if fd == nil { + file, err := protoregistry.GlobalFiles.FindFileByPath(name) + if err != nil { + return res, err + } + res.Proto = protodesc.ToFileDescriptorProto(file) + } else { + res.Proto = fd.(linker.Result).FileDescriptorProto() + } + res.Proto = proto.Clone(res.Proto).(*descriptorpb.FileDescriptorProto) + return res, nil + }, + )), + } + filesFromNoAST, err := compiler.Compile(context.Background(), fileNames...) + require.NoError(t, err) + + for _, file := range files { + fromNoAST := filesFromNoAST.FindFileByPath(file.Path()) + require.NotNil(t, fromNoAST) + fd := file.(linker.Result).FileDescriptorProto() + fdFromNoAST := fromNoAST.(linker.Result).FileDescriptorProto() + // final protos, with options interpreted, match + diff := cmp.Diff(fd, fdFromNoAST, protocmp.Transform()) + require.Empty(t, diff) + } + + // Also make sure the canonical bytes are correct + for _, file := range filesFromNoAST { + res := file.(linker.Result) + canonicalFd := res.CanonicalProto() + data, err := proto.Marshal(canonicalFd) + require.NoError(t, err) + fromCanonical := &descriptorpb.FileDescriptorProto{} + err = proto.UnmarshalOptions{Resolver: linker.ResolverFromFile(file)}.Unmarshal(data, fromCanonical) + require.NoError(t, err) + origFd := res.FileDescriptorProto() + diff := cmp.Diff(origFd, fromCanonical, protocmp.Transform()) + require.Empty(t, diff) + } +}