From 3649f873a3ff7c9d01653dfd67031ab2f9548df5 Mon Sep 17 00:00:00 2001 From: zhangskz Date: Mon, 16 Dec 2024 13:33:11 -0500 Subject: [PATCH] Remove unused / invalid C++ lazy repeated field code from OSS. (#19682) Fixes https://github.com/protocolbuffers/protobuf/issues/19671 PiperOrigin-RevId: 706729524 --- src/google/protobuf/lazy_repeated_field.cc | 343 ----- src/google/protobuf/lazy_repeated_field.h | 1123 ----------------- .../protobuf/lazy_repeated_field_heavy.cc | 401 ------ src/google/protobuf/repeated_ptr_field.h | 1 - 4 files changed, 1868 deletions(-) delete mode 100644 src/google/protobuf/lazy_repeated_field.cc delete mode 100644 src/google/protobuf/lazy_repeated_field.h delete mode 100644 src/google/protobuf/lazy_repeated_field_heavy.cc diff --git a/src/google/protobuf/lazy_repeated_field.cc b/src/google/protobuf/lazy_repeated_field.cc deleted file mode 100644 index 4ea2a504cf549..0000000000000 --- a/src/google/protobuf/lazy_repeated_field.cc +++ /dev/null @@ -1,343 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2023 Google Inc. All rights reserved. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file or at -// https://developers.google.com/open-source/licenses/bsd - -#include "google/protobuf/lazy_repeated_field.h" - -#include -#include -#include -#include -#include -#include - -#include "absl/log/absl_check.h" -#include "absl/log/absl_log.h" -#include "absl/log/log.h" -#include "absl/strings/cord.h" -#include "absl/strings/str_cat.h" -#include "absl/strings/string_view.h" -#include "absl/types/optional.h" -#include "google/protobuf/arena.h" -#include "google/protobuf/generated_message_util.h" -#include "google/protobuf/io/coded_stream.h" -#include "google/protobuf/io/zero_copy_stream_impl_lite.h" -#include "google/protobuf/message_lite.h" -#include "google/protobuf/parse_context.h" -#include "google/protobuf/port.h" -#include "google/protobuf/repeated_ptr_field.h" - -// Must be included last. -// clang-format off -#include "google/protobuf/port_def.inc" -// clang-format on - -namespace google { -namespace protobuf { -namespace internal { -namespace {} // namespace - -namespace { - -inline const char* InternalParseRepeated(const char* ptr, - ParseContext* local_ctx, - RepeatedPtrFieldBase* value, - const MessageLite* prototype) { - uint32_t expected_tag; - ptr = ReadTag(ptr, &expected_tag); - if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr; - // TODO: Try to optimize this. The tags and lengths are read again - // which is a bit wasteful. - return LazyRepeatedPtrField::ParseToRepeatedMessage( - ptr, local_ctx, prototype, expected_tag, value); -} - -template -inline bool ParseWithNullOuterContextImpl(const T& input, - RepeatedPtrFieldBase* value, - const MessageLite* prototype, - bool set_missing_required) { - // Null outer context means it's either already verified or unverified. - // - // If the payload is eagerly verified, the recursion limit was also verified - // and we don't need to repeat that. Also, users might have used a custom - // limit which is not known at this access. - // - // Unverified lazy fields may suffer from stack overflow with deeply nested - // data. We argue that it should be better than silent data corruption. - constexpr int kUnlimitedDepth = std::numeric_limits::max(); - const char* ptr; - ParseContext local_ctx(kUnlimitedDepth, false, &ptr, input); - - if (set_missing_required) { - local_ctx.SetParentMissingRequiredFields(); - } - // Unparsed data is already verified at parsing. Disable eager-verification. - (void)local_ctx.set_lazy_parse_mode(ParseContext::LazyParseMode::kLazy); - - ptr = InternalParseRepeated(ptr, &local_ctx, value, prototype); - return ptr != nullptr && - (local_ctx.EndedAtEndOfStream() || local_ctx.EndedAtLimit()); -} - -template -inline bool ParseWithOuterContextImpl(const T& input, ParseContext* ctx, - RepeatedPtrFieldBase* value, - const MessageLite* prototype, - bool set_missing_required) { - if (ctx == nullptr) { - return ParseWithNullOuterContextImpl(input, value, prototype, - set_missing_required); - } - - ABSL_DCHECK(!ctx->AliasingEnabled()); - // set_missing_required => ctx == nullptr - ABSL_DCHECK(!set_missing_required); - - // Create local context with depth. - const char* ptr; - ParseContext local_ctx(ParseContext::kSpawn, *ctx, &ptr, input); - - if (set_missing_required) { - local_ctx.SetParentMissingRequiredFields(); - } - if (ctx->lazy_parse_mode() == ParseContext::LazyParseMode::kEagerVerify) { - // Unparsed data is already verified at parsing. Disable eager-verification. - (void)local_ctx.set_lazy_parse_mode(ParseContext::LazyParseMode::kLazy); - } - - ptr = InternalParseRepeated(ptr, &local_ctx, value, prototype); - - if (local_ctx.missing_required_fields()) { - ctx->SetMissingRequiredFields(); - } - - return ptr != nullptr && - (local_ctx.EndedAtEndOfStream() || local_ctx.EndedAtLimit()); -} - -class ByPrototype { - public: - explicit ByPrototype(const MessageLite* prototype) : prototype_(prototype) {} - - MessageLite* New(Arena* arena) const { return prototype_->New(arena); } - - const MessageLite& Default() const { return *prototype_; } - - private: - const MessageLite* prototype_; -}; -} // namespace - -const RepeatedPtrFieldBase* LazyRepeatedPtrField::GetByPrototype( - const MessageLite* prototype, Arena* arena, ParseContext* ctx) const { - return GetGeneric(ByPrototype(prototype), arena, ctx); -} - -RepeatedPtrFieldBase* LazyRepeatedPtrField::MutableByPrototype( - const MessageLite* prototype, Arena* arena, ParseContext* ctx) { - return MutableGeneric(ByPrototype(prototype), arena, ctx); -} - -void LazyRepeatedPtrField::Clear() { - PerformTransition([](ExclusiveTxn& txn) { - auto* value = txn.mutable_value(); - if (value != nullptr) value->Clear>(); - return RawState::kCleared; - }); -} - -bool LazyRepeatedPtrField::IsEagerSerializeSafe(const MessageLite* prototype, - int32_t number, - Arena* arena) const { - // "prototype" may be null if it is for dynamic messages. This is ok as - // dynamic extensions won't be lazy as they lack verify functions any way. - if (prototype == nullptr) return false; - - for (;;) { - switch (GetLogicalState()) { - case LogicalState::kClear: - case LogicalState::kClearExposed: - case LogicalState::kDirty: - return true; - case LogicalState::kNoParseRequired: { - const auto* value = raw_.load(std::memory_order_relaxed).value(); - size_t tag_size = WireFormatLite::TagSize( - number, WireFormatLite::FieldType::TYPE_MESSAGE); - size_t total_size = tag_size * value->size(); - for (int i = 0; i < value->size(); i++) { - total_size += WireFormatLite::LengthDelimitedSize( - value->Get>(i).ByteSizeLong()); - } - return total_size == unparsed_.Size(); - } - case LogicalState::kParseRequired: { - GetByPrototype(prototype, arena); - break; // reswitch - } - } - } - // Required for certain compiler configurations. - ABSL_LOG(FATAL) << "Not reachable"; - return false; -} - -void LazyRepeatedPtrField::swap_atomics(std::atomic& lhs, - std::atomic& rhs) { - auto l = lhs.exchange(rhs.load(std::memory_order_relaxed), - std::memory_order_relaxed); - rhs.store(l, std::memory_order_relaxed); -} - -void LazyRepeatedPtrField::Swap(LazyRepeatedPtrField* lhs, Arena* lhs_arena, - LazyRepeatedPtrField* rhs, Arena* rhs_arena) { - static auto reallocate = [](LazyRepeatedPtrField* f, Arena* arena, - bool cleanup_old) { - auto raw = f->raw_.load(std::memory_order_relaxed); - if (raw.value() != nullptr) { - auto* new_value = Arena::Create(arena); - if (!raw.value()->empty()) { - new_value->MergeFrom(*raw.value()); - } - if (cleanup_old) { - delete reinterpret_cast*>( - raw.value()); - }; - raw.set_value(new_value); - f->raw_.store(raw, std::memory_order_relaxed); - } - auto old_unparsed = f->unparsed_; - f->unparsed_.Visit( - [] {}, - [&](auto& cord) { f->unparsed_.InitAsCord(arena, std::move(cord)); }, - [&](auto view) { - if (arena == nullptr) { - f->unparsed_.InitAsCord(arena, view); - } else { - f->unparsed_.InitAndSetArray(arena, view); - } - }); - if (cleanup_old) old_unparsed.Destroy(); - }; - static auto take_ownership = [](LazyRepeatedPtrField* f, Arena* arena) { - if (internal::DebugHardenForceCopyInSwap()) { - reallocate(f, arena, true); - } else { - arena->Own(reinterpret_cast*>( - f->raw_.load(std::memory_order_relaxed).mutable_value())); - f->unparsed_.TransferHeapOwnershipToArena(arena); - } - }; - - using std::swap; // Enable ADL with fallback - swap_atomics(lhs->raw_, rhs->raw_); - swap(lhs->unparsed_, rhs->unparsed_); - // At this point we are in a weird state. The messages have been swapped into - // their destination, but we have completely ignored the arenas, so the owning - // arena is actually on the opposite message. Now we straighten out our - // ownership by forcing reallocations/ownership changes as needed. - if (lhs_arena == rhs_arena) { - if (internal::DebugHardenForceCopyInSwap() && lhs_arena == nullptr) { - reallocate(lhs, lhs_arena, true); - reallocate(rhs, rhs_arena, true); - } - } else { - if (lhs_arena == nullptr) { - take_ownership(rhs, rhs_arena); - reallocate(lhs, lhs_arena, false); - } else if (rhs_arena == nullptr) { - take_ownership(lhs, lhs_arena); - reallocate(rhs, rhs_arena, false); - } else { - reallocate(lhs, lhs_arena, false); - reallocate(rhs, rhs_arena, false); - } - } -} - -void LazyRepeatedPtrField::InternalSwap( - LazyRepeatedPtrField* PROTOBUF_RESTRICT lhs, - LazyRepeatedPtrField* PROTOBUF_RESTRICT rhs) { - using std::swap; // Enable ADL with fallback - swap_atomics(lhs->raw_, rhs->raw_); - swap(lhs->unparsed_, rhs->unparsed_); -} - -bool LazyRepeatedPtrField::ParseWithOuterContext(RepeatedPtrFieldBase* value, - const absl::Cord& input, - ParseContext* ctx, - const MessageLite* prototype, - bool set_missing_required) { - absl::optional flat = input.TryFlat(); - if (flat.has_value()) { - return ParseWithOuterContextImpl(*flat, ctx, value, prototype, - set_missing_required); - } - - io::CordInputStream cis(&input); - return ParseWithOuterContextImpl(&cis, ctx, value, prototype, - set_missing_required); -} - -bool LazyRepeatedPtrField::ParseWithOuterContext(RepeatedPtrFieldBase* value, - absl::string_view input, - ParseContext* ctx, - const MessageLite* prototype, - bool set_missing_required) { - return ParseWithOuterContextImpl(input, ctx, value, prototype, - set_missing_required); -} - -size_t LazyRepeatedPtrField::ByteSizeLong(size_t tag_size) const { - switch (GetLogicalState()) { - case LogicalState::kClear: - case LogicalState::kClearExposed: - case LogicalState::kNoParseRequired: - case LogicalState::kParseRequired: - return unparsed_.Size(); - - case LogicalState::kDirty: - const auto* value = raw_.load(std::memory_order_relaxed).value(); - size_t total_size = tag_size * value->size(); - for (int i = 0; i < value->size(); i++) { - total_size += WireFormatLite::LengthDelimitedSize( - value->Get>(i).ByteSizeLong()); - } - return total_size; - } - // Required for certain compiler configurations. - ABSL_LOG(FATAL) << "Not reachable"; - return -1; -} - -void LazyRepeatedPtrField::LogParseError(const RepeatedPtrFieldBase* value) { - const MessageLite* message = - &value->at>(0); - auto get_error_string = [&value]() { - std::string str; - for (int i = 0; i < value->size(); i++) { - absl::StrAppend(&str, "[", i, "]: ", - value->at>(i) - .InitializationErrorString(), - "\n"); - } - return str; - }; -#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) - // In fuzzing mode, we log less to speed up fuzzing. - ABSL_LOG_EVERY_N(INFO, 100000) -#else - ABSL_LOG_EVERY_N_SEC(INFO, 1) -#endif - << "Lazy parsing failed for RepeatedPtrField<" << message->GetTypeName() - << "> error=" << get_error_string() << " (N = " << COUNTER << ")"; -} - -} // namespace internal -} // namespace protobuf -} // namespace google - -#include "google/protobuf/port_undef.inc" diff --git a/src/google/protobuf/lazy_repeated_field.h b/src/google/protobuf/lazy_repeated_field.h deleted file mode 100644 index 34838bdfd22e1..0000000000000 --- a/src/google/protobuf/lazy_repeated_field.h +++ /dev/null @@ -1,1123 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2023 Google Inc. All rights reserved. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file or at -// https://developers.google.com/open-source/licenses/bsd - -#ifndef GOOGLE_PROTOBUF_LAZY_REPEATED_FIELD_H__ -#define GOOGLE_PROTOBUF_LAZY_REPEATED_FIELD_H__ - -#include -#include -#include -#include -#include -#include -#include - -#include "absl/base/attributes.h" -#include "absl/log/absl_check.h" -#include "absl/strings/cord.h" -#include "absl/strings/str_cat.h" -#include "absl/strings/string_view.h" -#include "google/protobuf/arena.h" -#include "google/protobuf/generated_message_util.h" -#include "google/protobuf/internal_visibility.h" -#include "google/protobuf/io/coded_stream.h" -#include "google/protobuf/message_lite.h" -#include "google/protobuf/parse_context.h" -#include "google/protobuf/port.h" -#include "google/protobuf/raw_ptr.h" -#include "google/protobuf/repeated_ptr_field.h" -#include "google/protobuf/wire_format_verify.h" - -#ifdef SWIG -#error "You cannot SWIG proto headers" -#endif - -// must be last -#include "google/protobuf/port_def.inc" - -namespace google { -namespace protobuf { - -class Descriptor; -namespace io { -class CodedInputStream; -class CodedOutputStream; -} // namespace io -} // namespace protobuf -} // namespace google - -namespace google { -namespace protobuf { -namespace internal { - -inline const char* ReadTagInternal(const char* ptr, uint8_t* tag) { - *tag = UnalignedLoad(ptr); - return ptr + sizeof(uint8_t); -} - -inline const char* ReadTagInternal(const char* ptr, uint16_t* tag) { - *tag = UnalignedLoad(ptr); - return ptr + sizeof(uint16_t); -} - -inline const char* ReadTagInternal(const char* ptr, uint32_t* tag) { - return ReadTag(ptr, tag); -} - -template -inline size_t TagSizeInternal(TagType tag); -template <> -inline size_t TagSizeInternal(uint8_t tag) { - return sizeof(uint8_t); -} -template <> -inline size_t TagSizeInternal(uint16_t tag) { - return sizeof(uint16_t); -} -template <> -inline size_t TagSizeInternal(uint32_t tag) { - return io::CodedOutputStream::VarintSize32(tag); -} - -// This class is used to represent lazily-loaded repeated message fields. -// It stores the field in a raw buffer or a Cord initially, and then parses that -// on-demand if a caller asks for the RepeatedPtrField object. -// -// As with most protobuf classes, const methods of this class are safe to call -// from multiple threads at once, but non-const methods may only be called when -// the thread has guaranteed that it has exclusive access to the field. -class LazyRepeatedPtrField { - public: - constexpr LazyRepeatedPtrField() : raw_(MessageState(RawState::kCleared)) {} - LazyRepeatedPtrField(const LazyRepeatedPtrField& rhs) - : LazyRepeatedPtrField(nullptr, rhs, nullptr) {} - - // Arena enabled constructors. - LazyRepeatedPtrField(internal::InternalVisibility, Arena* arena) - : LazyRepeatedPtrField(arena) {} - LazyRepeatedPtrField(internal::InternalVisibility, Arena* arena, - const LazyRepeatedPtrField& rhs, Arena* rhs_arena) - : LazyRepeatedPtrField(arena, rhs, rhs_arena) {} - - // TODO: make this constructor private - explicit constexpr LazyRepeatedPtrField(Arena*) - : raw_(MessageState(RawState::kCleared)) {} - - LazyRepeatedPtrField& operator=(const LazyRepeatedPtrField&) = delete; - - ~LazyRepeatedPtrField(); - - bool IsClear() const { - auto state = GetLogicalState(); - return state == LogicalState::kClear || - state == LogicalState::kClearExposed; - } - - // Get and Mutable trigger parsing. - template - const RepeatedPtrField& Get(const Element* default_instance, - Arena* arena) const { - return *reinterpret_cast*>( - GetGeneric(ByTemplate(default_instance), arena, nullptr)); - } - - template - RepeatedPtrField* Mutable(const Element* default_instance, - Arena* arena) { - return reinterpret_cast*>( - MutableGeneric(ByTemplate(default_instance), arena, nullptr)); - } - - bool IsInitialized(const MessageLite* prototype, Arena* arena) const { - switch (GetLogicalState()) { - case LogicalState::kClear: - case LogicalState::kClearExposed: { - return true; - } - case LogicalState::kParseRequired: - case LogicalState::kNoParseRequired: { - // Returns true if "unparsed" is not verified to be (maybe) - // uninitialized. Otherwise, falls through to next cases to eagerly - // parse message and call IsInitialized(). - if (!MaybeUninitialized()) return true; - } - ABSL_FALLTHROUGH_INTENDED; - case LogicalState::kDirty: { - const auto& value = *GetByPrototype(prototype, arena); - for (int i = 0; i < value.size(); ++i) { - if (!value.Get>(i).IsInitialized()) - return false; - } - return true; - } - default: - __builtin_unreachable(); - } - } - - // Dynamic versions of basic accessors. - const RepeatedPtrFieldBase* GetDynamic(const Descriptor* type, - MessageFactory* factory, - Arena* arena) const; - RepeatedPtrFieldBase* MutableDynamic(const Descriptor* type, - MessageFactory* factory, Arena* arena); - - // Basic accessors that use a default instance to create the message. - const RepeatedPtrFieldBase* GetByPrototype(const MessageLite* prototype, - Arena* arena, - ParseContext* ctx = nullptr) const; - RepeatedPtrFieldBase* MutableByPrototype(const MessageLite* prototype, - Arena* arena, - ParseContext* ctx = nullptr); - - void Clear(); - - // Updates state such that state set in other overwrites this. - // - // Internal Lazy state transitions are updated as such: - // - // src\dest | UNINIT | INIT | DIRTY | CLEAR | ERROR - // :------- | :----: | :---: | :---: | :-----------: | :---: - // UNINIT | DIRTY | DIRTY | DIRTY | UNINIT/DIRTY* | DIRTY - // INIT | DIRTY | DIRTY | DIRTY | UNINIT/DIRTY* | UNDEF - // DIRTY | DIRTY | DIRTY | DIRTY | UNINIT/DIRTY* | UNDEF - // CLEAR | UNINIT | INIT | DIRTY | CLEAR | UNDEF - // ERROR | DIRTY | DIRTY | DIRTY | DIRTY | DIRTY - // * Depends on if clear was initialized before. - // TODO: The state after ERROR should be DIRTY. Also need to make the - // change for LazyField. - void MergeFrom(const MessageLite* prototype, - const LazyRepeatedPtrField& other, Arena* arena, - Arena* other_arena); - - static void Swap(LazyRepeatedPtrField* lhs, Arena* lhs_arena, - LazyRepeatedPtrField* rhs, Arena* rhs_arena); - static void InternalSwap(LazyRepeatedPtrField* lhs, - LazyRepeatedPtrField* rhs); - - const RepeatedPtrFieldBase* TryGetRepeated() const; - - // Returns true when the lazy field has data that have not yet parsed. - // (i.e. parsing has been deferred) Once parsing has been attempted, this - // returns false. Note that the LazyField object may still contain - // the raw unparsed data with parsing errors. - bool HasUnparsed() const { - return GetLogicalState() == LogicalState::kParseRequired; - } - - // Returns true if parsing has been attempted and it failed. - bool HasParsingError() const { - auto raw = raw_.load(std::memory_order_relaxed); - return raw.status() == RawState::kParseError; - } - - // APIs that will be used by table-driven parsing. - // - // `TagType` is passed from table-driven parser. On fast path it's uint8 or - // uint16; on slow path it's uint32. - template - const char* _InternalParse(const MessageLite* prototype, Arena* arena, - const char* ptr, ParseContext* ctx, - TagType expected_tag) { - // If this message is eagerly-verified lazy, kEager mode likely suggests - // that previous verification has failed and we fall back to eager-parsing - // (either to initialize the message to match eager field or to fix false - // errors. - // - // Lazy parsing does not support aliasing and may result in data copying. - // It seems prudent to honor aliasing to avoid any observable gaps between - // lazy and eager parsing. - if (ctx->lazy_parse_mode() == ParseContext::kEager || - ctx->AliasingEnabled()) { - auto* value = MutableByPrototype(prototype, arena, ctx); - ptr = ParseToRepeatedMessage(ptr, ctx, prototype, expected_tag, - value); - return ptr; - } - - switch (GetLogicalState()) { - case LogicalState::kParseRequired: { - return ParseToCord(ptr, ctx, prototype, arena, expected_tag); - } break; - - case LogicalState::kClear: { - // Clear/Fresh have empty unparsed data; so this is the equivalent - // of setting it to the passed in bytes. - return ParseToCord(ptr, ctx, prototype, arena, expected_tag); - } break; - - // Pointers exposed. - case LogicalState::kClearExposed: - case LogicalState::kNoParseRequired: - case LogicalState::kDirty: { - PerformTransition([&](ExclusiveTxn& txn) { - auto* value = txn.mutable_value(); - ptr = ParseToRepeatedMessage(ptr, ctx, prototype, - expected_tag, value); - return RawState::kIsParsed; - }); - return ptr; - } - } - // Required for certain compiler configurations. - internal::Unreachable(); - return nullptr; - } - - template - const char* _InternalParseVerify(const MessageLite* prototype, Arena* arena, - const char* ptr, ParseContext* ctx, - TagType expected_tag, - absl::string_view data) { - ABSL_DCHECK(ptr != nullptr); - if (ctx->lazy_parse_mode() == ParseContext::kLazy || - ctx->lazy_eager_verify_func() == nullptr) { - return ptr; - } - VerifyResult res = WireFormatVerifyView(data, ctx); - if (res.verified) { - if (res.missing_required_fields) { - // Unparsed data may be uninitialized and need to be parsed to be sure. - SetNeedsParseMaybeUninitialized(); - } - return ptr; - } - - // Try eager parsing on potentially malformed wire in case the eager parsing - // fixes the issue. For example, a negative int32 encoded as 5B varint can - // be parsed correctly. - // - // Should preserve the old parsing mode because we don't want to - // unnecessarily eager-parse other parts of message tree. This can be - // especially inefficient if the eager verification results in false - // positive errors. - ParseContext::LazyParseMode old = - ctx->set_lazy_parse_mode(ParseContext::kEager); - (void)GetByPrototype(prototype, arena, ctx); - - // If eager parsing still fails, don't bother restoring the parse mode. - if (HasParsingError()) return nullptr; - - // Unverified lazy fields may miss parsing errors on eager parsing. If it's - // certain, just mark error and return. - if (!ctx->treat_eager_parsing_errors_as_errors()) { - auto raw = raw_.load(std::memory_order_relaxed); - raw.set_status(RawState::kParseError); - raw_.store(raw, std::memory_order_relaxed); - ABSL_DCHECK(HasParsingError()); - return nullptr; - } - - // We need to transition to dirty to prefer eager serialization as the - // unparsed_ has non-canonical wire format. - (void)MutableByPrototype(prototype, arena); - - (void)ctx->set_lazy_parse_mode(old); - return ptr; - } - - template - static const char* ParseToRepeatedMessage(const char* ptr, ParseContext* ctx, - const MessageLite* prototype, - TagType expected_tag, - RepeatedPtrFieldBase* value) { - const char* ptr2 = ptr; - TagType next_tag; - do { - MessageLite* submsg = value->AddMessage(prototype); - // ptr2 points to the start of the element's encoded length. - ptr = ctx->ParseMessage(submsg, ptr2); - if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr; - if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) { - if (ctx->Done(&ptr)) { - break; - } - } - ptr2 = ReadTagInternal(ptr, &next_tag); - if (PROTOBUF_PREDICT_FALSE(ptr2 == nullptr)) return nullptr; - } while (next_tag == expected_tag); - return ptr; - } - - template - const char* ParseToCord(const char* ptr, ParseContext* ctx, - const MessageLite* prototype, Arena* arena, - TagType expected_tag) { - // ptr2 points to the start of the encoded length. - const char* ptr2 = ptr; - TagType next_tag; - // Move ptr back to the start of the tag. - size_t tag_size = TagSizeInternal(expected_tag); - ptr -= tag_size; - if (ctx->parent_missing_required_fields()) { - SetNeedsParseMaybeUninitialized(); - } else { - SetNeedsParse(); - } - do { - std::string tmp; - // Append the tag. - tmp.append(absl::string_view(ptr, ptr2 - ptr)); - size_t taglen_size; - ptr = ctx->ParseLengthDelimitedInlined( - ptr2, [&tmp, &taglen_size, ctx, ptr2](const char* p) { - // At this moment length is read and p points to the start of - // the payload. - ABSL_DCHECK(p - ptr2 > 0 && p - ptr2 <= 5) << p - ptr2; - // Append the length. - tmp.append(absl::string_view(ptr2, p - ptr2)); - taglen_size = tmp.size(); - return ctx->AppendString(p, &tmp); - }); - if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr; - const auto tmp_size = tmp.size(); - ABSL_DCHECK_GE(tmp_size, taglen_size); - if (unparsed_.IsCord()) { - unparsed_.AsCord().Append(tmp); - } else if (arena != nullptr && - unparsed_.Size() + tmp_size <= kMaxArraySize) { - if (unparsed_.IsEmpty()) { - unparsed_.InitAsArray(arena, 0); - } - unparsed_.AppendToArray(tmp); - } else { - unparsed_.UpgradeToCord(arena).Append(tmp); - } - if (tmp_size > taglen_size) { - ptr = _InternalParseVerify( - prototype, arena, ptr, ctx, expected_tag, - absl::string_view(tmp.data() + taglen_size, - tmp_size - taglen_size)); - if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr; - } - if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) { - // `Done` advances the stream to the next buffer chunk. - if (ctx->Done(&ptr)) { - break; - } - } - // ptr points to the start of the next tag. - ptr2 = ReadTagInternal(ptr, &next_tag); - // ptr2 points to the start of the next element's encoded length. - - // TODO: Try to remove the following condition for 8 and 16 bits - // TagType. - if (PROTOBUF_PREDICT_FALSE(ptr2 == nullptr)) return nullptr; - } while (next_tag == expected_tag); - if (unparsed_.IsArray()) { - unparsed_.ZeroOutTailingBytes(); - } - return ptr; - } - - uint8_t* InternalWrite(const MessageLite* prototype, int32_t number, - uint8_t* target, - io::EpsCopyOutputStream* stream) const; - - // ByteSize of the repeated ptr field (including the varints of tags and - // lengths). - size_t ByteSizeLong(size_t tag_size) const; - size_t SpaceUsedExcludingSelfLong() const; - - // LogicalState combines the `raw_` and `unparsed_` fields to produce the - // current state. - // - // This separation allows more easily adding fine-grained states w/o touching - // std::atomics; most state transitions are in a write context and do not - // require subtle atomicity. - // TODO: Deduplicate with LazyField. - enum class LogicalState { - // The serialized data is available and unparsed. - // (kParseRequired, !unparsed.empty(), message = undefined). - kParseRequired, - // The message has been parsed from the serialized data. - // (kIsParsed, !unparsed.empty(), message != nullptr). - kNoParseRequired, - // The field is clear (freshly constructed or cleared): - // - (kCleared, unparsed.empty(), message = nullptr) - kClear, - // The field is clear but previously exposed a pointer. - // - (kCleared, unparsed.empty(), message = !nullptr) - kClearExposed, - // A write operation was done after a parse. - // (kIsParsed, unparsed.empty(), message != nullptr) - kDirty, - }; - LogicalState GetLogicalState() const { - auto raw = raw_.load(std::memory_order_acquire); - switch (raw.status()) { - case RawState::kParseError: - ABSL_DCHECK_NE(raw.value(), nullptr); - return LogicalState::kDirty; - case RawState::kCleared: - ABSL_DCHECK(unparsed_.IsEmpty()); - ABSL_DCHECK(raw.value() == nullptr || raw.value()->empty()) - << (raw.value() == nullptr - ? "nullptr" - : absl::StrCat("non-empty:", raw.value()->size())); - return raw.value() == nullptr ? LogicalState::kClear - : LogicalState::kClearExposed; - case RawState::kNeedsParse: - case RawState::kNeedsParseMaybeUninitialized: - // There is no SetEncoded, so unparsed_ is always from _InternalParse, - // which can't be empty. - ABSL_DCHECK(!unparsed_.IsEmpty()); - ABSL_DCHECK(raw.value() == nullptr || raw.value()->empty()); - return LogicalState::kParseRequired; - default: - ABSL_DCHECK(raw.status() == RawState::kIsParsed || - raw.status() == RawState::kIsParsedMaybeUninitialized); - ABSL_DCHECK(raw.value() != nullptr); - // Only other Initialized state was kParseError which is handled above. - if (unparsed_.IsEmpty()) { - return LogicalState::kDirty; - } - // Non-null message, unparsed exists. - return LogicalState::kNoParseRequired; - } - } - - private: - // Values that can be kept in `MessageState`'s status bits. - // TODO: Deduplicate with LazyField. - enum class RawState { - // `unparsed_` is empty. - // `message_` is either nullptr or an empty container. - kCleared, - - // `unparsed_` contains the canonical field data. - // `message_` points to the result of parsing that data. - // - // NOTE: serializing `message_` may produce different bytes than - // `unparsed_`, so care must be taken around issues of canonical or - // deterministic serialization. Generally, `unparsed_` should be preferred - // if it is not empty, as that is lower overhead. - kIsParsed, - - // IsParsed and may be uninitialized. See - // kNeedsParseMaybeUninitialized for details. - kIsParsedMaybeUninitialized, - - // TODO: add kIsParsedIgnoreUnparsed and - // kIsParsedIgnoreUnparsedMaybeUninitialized. - - // `message_` points to the result of parsing that data, but there was an - // error when parsing. Partially parsed `message_` is considered canonical - // to match eager fields. - kParseError, - - // `unparsed_` contains the field data. - // `message_` is either nullptr or an empty container. - kNeedsParse, - - // kNeedsParse and may be uninitialized. - // - // MaybeUninitialized is flagged in the verification and recorded to trigger - // eager parsing on IsInitialized() to be certain. - // - // Note that unverified data is assumed to be initialized (to support legacy - // cases) and treated as if it's verified to be initialized. Therefore, we - // need "MaybeUninitialized" rather than "Initialized". - kNeedsParseMaybeUninitialized, - - kMaxState = kNeedsParseMaybeUninitialized - }; - - class MessageState { - public: - constexpr explicit MessageState(RawState state) : raw_(ToUint32(state)) {} - MessageState(const RepeatedPtrFieldBase* message, RawState state) - : raw_(reinterpret_cast(message) | ToUint32(state)) { - ABSL_DCHECK_EQ(reinterpret_cast(message) & ToUint32(state), - 0u); - } - - const RepeatedPtrFieldBase* value() const { - return reinterpret_cast(raw_ & ~0b111); - } - - RepeatedPtrFieldBase* mutable_value() const { - return reinterpret_cast(raw_ & ~0b111); - } - - RawState status() const { return ToRawState(raw_ & 0b111); } - - void set_status(RawState status) { - raw_ &= ~0b111; - raw_ |= ToUint32(status); - } - - void set_value(const RepeatedPtrFieldBase* message) { - raw_ &= 0b111; - raw_ |= reinterpret_cast(message); - } - - static inline constexpr uint32_t ToUint32(RawState status) { - return static_cast(status); - } - static inline RawState ToRawState(uint32_t status) { - ABSL_DCHECK_LE(status, ToUint32(RawState::kMaxState)); - return static_cast(status); - } - - bool NeedsParse() const { - // kNeedsParse and kNeedsParseMaybeUninitialized must be 0 and 1 to make - // NeedsParse() check cheap. - static_assert( - RawState::kNeedsParseMaybeUninitialized == RawState::kMaxState, ""); - static_assert(ToUint32(RawState::kNeedsParseMaybeUninitialized) == - ToUint32(RawState::kNeedsParse) + 1, - ""); - return status() >= RawState::kNeedsParse; - } - - private: - uintptr_t raw_; - }; - - // TODO: Deduplicate. - template - class ByTemplate { - public: - // Only `Get()` needs access to the default element, but we don't want to - // force instantiation of `MessageType::default_instance()` because it - // doesn't exist in all configurations. - explicit ByTemplate() : ByTemplate(nullptr) {} - explicit ByTemplate(const MessageType* default_instance) - : default_instance_(default_instance) {} - - MessageLite* New(Arena* arena) const { - return reinterpret_cast( - Arena::DefaultConstruct(arena)); - } - - const MessageLite& Default() const { - ABSL_DCHECK(default_instance_ != nullptr); - return *reinterpret_cast(default_instance_); - } - - private: - const MessageType* default_instance_; - }; - - // Copy constructor on arena. - LazyRepeatedPtrField(Arena* arena, const LazyRepeatedPtrField& rhs, - Arena* rhs_arena); - - // Serialization methods. Note that WriteToCord may override/clear the - // given cord. - template - bool MergeFrom(const MessageLite* prototype, const Input& data, Arena* arena); - - private: - template - MessageState SharedInit(Strategy strategy, Arena* arena, - ParseContext* ctx) const { - auto old_raw = raw_.load(std::memory_order_acquire); - if (!old_raw.NeedsParse()) return old_raw; - MessageState new_raw = - // Transfer MaybeUninitialized state after a state transition. - DoParse(nullptr, strategy.Default(), arena, ctx, - old_raw.status() == RawState::kNeedsParseMaybeUninitialized); - if (raw_.compare_exchange_strong(old_raw, new_raw, - std::memory_order_release, - std::memory_order_acquire)) { - // We won the race. Dispose of the old message (if there was one). - if (arena == nullptr) { - delete reinterpret_cast*>( - old_raw.value()); - } - return new_raw; - } else { - // We lost the race, but someone else will have installed the new - // value. Dispose of the our attempt at installing. - if (arena == nullptr) { - delete reinterpret_cast*>( - new_raw.value()); - } - ABSL_DCHECK(!old_raw.NeedsParse()); - return old_raw; - } - } - - template - MessageState ExclusiveInitWithoutStore(Strategy strategy, Arena* arena, - ParseContext* ctx) { - auto old_raw = raw_.load(std::memory_order_relaxed); - if (!old_raw.NeedsParse() && old_raw.value() != nullptr) return old_raw; - if (old_raw.NeedsParse()) { - // Mutable messages need not transfer MaybeUninitialized. - return DoParse(old_raw.mutable_value(), strategy.Default(), arena, ctx, - false); - } - ABSL_DCHECK(old_raw.value() == nullptr); - return MessageState(Arena::Create(arena), - RawState::kIsParsed); - } - - template - const RepeatedPtrFieldBase* GetGeneric(Strategy strategy, Arena* arena, - ParseContext* ctx) const { - const auto* value = SharedInit(strategy, arena, ctx).value(); - if (value == nullptr) { - return reinterpret_cast(DefaultRawPtr()); - } - return value; - } - - template - RepeatedPtrFieldBase* MutableGeneric(Strategy strategy, Arena* arena, - ParseContext* ctx) { - auto raw = ExclusiveInitWithoutStore(strategy, arena, ctx); - unparsed_.Clear(); - ABSL_DCHECK(raw.value() != nullptr); - raw.set_status(RawState::kIsParsed); - raw_.store(raw, std::memory_order_relaxed); - return raw.mutable_value(); - } - - void SetNeedsParse() { - auto raw = raw_.load(std::memory_order_relaxed); - raw.set_status(RawState::kNeedsParse); - raw_.store(raw, std::memory_order_relaxed); - } - - void SetNeedsParseMaybeUninitialized() { - auto raw = raw_.load(std::memory_order_relaxed); - ABSL_DCHECK(raw.status() == RawState::kNeedsParse || - raw.status() == RawState::kNeedsParseMaybeUninitialized); - raw.set_status(RawState::kNeedsParseMaybeUninitialized); - raw_.store(raw, std::memory_order_relaxed); - } - - void SetParseNotRequiredMaybeUninitialized() { - auto raw = raw_.load(std::memory_order_relaxed); - ABSL_DCHECK(raw.status() == RawState::kIsParsed || - raw.status() == RawState::kIsParsedMaybeUninitialized); - raw.set_status(RawState::kIsParsedMaybeUninitialized); - raw_.store(raw, std::memory_order_relaxed); - } - - bool MaybeUninitialized() const { - auto raw = raw_.load(std::memory_order_relaxed); - if (raw.status() == RawState::kNeedsParseMaybeUninitialized) return true; - - // Make sure the logical state matches as well. - return raw.status() == RawState::kIsParsedMaybeUninitialized && - GetLogicalState() == LogicalState::kNoParseRequired; - } - - // Adds MaybeUninitialized state if "other" may be uninitialized. - void MergeMaybeUninitializedState(const LazyRepeatedPtrField& other); - - bool IsEagerSerializeSafe(const MessageLite* prototype, int32_t number, - Arena* arena) const; - - static void swap_atomics(std::atomic& lhs, - std::atomic& rhs); - - // Helper to enforce invariants when exclusive R/M/W access is required. - class ExclusiveTxn { - public: - explicit ExclusiveTxn(LazyRepeatedPtrField& lazy) - : lazy_(lazy), state_(lazy_.raw_.load(std::memory_order_relaxed)) {} - - RepeatedPtrFieldBase* mutable_value() { - // Any write to the message at this point should nuke unparsed_. - lazy_.unparsed_.Clear(); - return state_.mutable_value(); - } - - void Commit(RawState new_status) { - if (state_.status() != new_status) { - state_.set_status(new_status); - lazy_.raw_.store(state_, std::memory_order_relaxed); - } - } - - private: - LazyRepeatedPtrField& lazy_; - MessageState state_; - }; - - template - RawState PerformTransition(Transition fn) { - ExclusiveTxn txn(*this); - RawState new_state = fn(txn); - txn.Commit(new_state); - return new_state; - } - - public: - // Payload abstraction that can hold a raw char array or a Cord depending on - // how much data it needs to hold. - // The caller is responsible for managing the lifetime of the payload. - // TODO: Deduplicate with the LazyField::UnparsedPayload. - class UnparsedPayload { - enum Tag : uintptr_t { - kTagEmpty = 0, - kTagArray = 1, - kTagCord = 2, - - kTagBits = 3, - kRemoveMask = ~kTagBits, - }; - - public: - using ArraySizeType = uint16_t; - - // Visit the payload and calls the respective callback. The signatures are: - // - () for kUnset - // - (Cord&) for kCord - // - (absl::string_view) for kArray - // Returns the value returned by the callback. - template - auto Visit(UnsetF unset_f, CordF cord_f, ViewF view_f) const { - Tag t = tag(); - // Using ternary to allow for common-type implicit conversions. - return t == kTagEmpty ? unset_f() - : t == kTagArray ? view_f(AsStringView()) - : cord_f(AsCord()); - } - - Tag tag() const { return static_cast(value_ & kTagBits); } - - bool IsCord() const { - ABSL_DCHECK_EQ(static_cast(value_ & kTagCord), - static_cast(tag() == kTagCord)); - return (value_ & kTagCord) != 0u; - } - - bool IsArray() const { - ABSL_DCHECK_EQ(static_cast(value_ & kTagArray), - static_cast(tag() == kTagArray)); - return (value_ & kTagArray) != 0u; - } - - // Requires: IsCord() - absl::Cord& AsCord() const { - ABSL_DCHECK(IsCord()); - return *reinterpret_cast(value_ & kRemoveMask); - } - - // Return the payload as Cord regardless of the existing storage. - absl::Cord ForceAsCord() const { - return Visit([] { return absl::Cord(); }, // - [](const auto& c) { return c; }, - [](auto view) { return absl::Cord(view); }); - } - - // Similar to AsCord(), but if the payload is not already a Cord it will - // convert it first, maintaining existing bytes. - absl::Cord& UpgradeToCord(Arena* arena) { - if (IsCord()) return AsCord(); - absl::Cord new_cord(AsStringView()); - return InitAsCord(arena, std::move(new_cord)); - } - - // Requires: input array is the untagged value. - ArraySizeType GetArraySize(const char* array) const { - ABSL_DCHECK_EQ(array, reinterpret_cast(value_ - kTagArray)); - ArraySizeType size; - memcpy(&size, array, sizeof(size)); - return size; - } - - void SetArraySize(void* array, ArraySizeType size) const { - ABSL_DCHECK_EQ(array, reinterpret_cast(value_ - kTagArray)); - memcpy(array, &size, sizeof(ArraySizeType)); - } - - void SetArraySize(ArraySizeType size) const { - void* array = reinterpret_cast(value_ - kTagArray); - memcpy(array, &size, sizeof(ArraySizeType)); - } - - // Requires: !IsCord() - absl::string_view AsStringView() const { - switch (tag()) { - case kTagEmpty: - return {}; - - case kTagArray: { - const char* array = reinterpret_cast(value_ - kTagArray); - auto size = GetArraySize(array); - return absl::string_view(array + sizeof(size), size); - } - - default: - Unreachable(); - } - } - - // Clear the payload. After this call `Size()==0` and `IsEmpty()==true`, but - // it is not necessarily true that `tag()==kTagEmpty`. - // In particular, it keeps the Cord around in case it needs to be reused. - void Clear() { - switch (tag()) { - case kTagEmpty: - case kTagArray: - value_ = 0; - break; - default: - AsCord().Clear(); - break; - } - } - - // Destroys allocated memory if necessary. Does not reset the object. - void Destroy() { - if (IsCord()) delete &AsCord(); - } - - bool IsEmpty() const { - return Visit([] { return true; }, - [](const auto& cord) { return cord.empty(); }, - [](auto view) { - ABSL_DCHECK(!view.empty()); - return false; - }); - } - - size_t Size() const { - return Visit([] { return 0; }, - [](const auto& cord) { return cord.size(); }, - [](auto view) { return view.size(); }); - } - - // Sets the currently value as a Cord constructed from `args...`. - // It will clean up the existing value if necessary. - template - void SetCord(Arena* arena, Arg&& arg) { - if (IsCord()) { - // Reuse the existing cord. - AsCord() = std::forward(arg); - } else { - absl::Cord* cord = - Arena::Create(arena, std::forward(arg)); - value_ = reinterpret_cast(cord) | kTagCord; - } - } - - // Initialize the value as a Cord constructed from `args...` - // Ignores existing value. - template - absl::Cord& InitAsCord(Arena* arena, Args&&... args) { - auto* cord = - Arena::Create(arena, std::forward(args)...); - value_ = reinterpret_cast(cord) | kTagCord; - return *cord; - } - - // Initialize the value as an array copied from `view`. The tailing bytes - // are set to 0 to avoid UB. - // Ignores existing value. - void InitAndSetArray(Arena* arena, absl::string_view view) { - char* array = InitAsArray(arena, view.size()); - memcpy(array, view.data(), view.size()); - if (view.size() < kMaxArraySize) { - // Memset uninit data to avoid UB later. - memset(array + view.size(), '\0', kMaxArraySize - view.size()); - } - ABSL_DCHECK_EQ(view, AsStringView()); - } - - // Initialize the value as an array copied from `cord`. The tailing bytes - // are set to 0 to avoid UB. - // Ignores existing value. - void InitAndSetArray(Arena* arena, const absl::Cord& cord) { - auto size = cord.size(); - char* array = InitAsArray(arena, size); - cord.CopyToArray(array); - if (size < kMaxArraySize) { - // Memset uninit data to avoid UB later. - memset(array + size, '\0', kMaxArraySize - size); - } - } - - // Initialize the value as an array of size `size`. The payload bytes are - // uninitialized. - // Ignores existing value. - char* InitAsArray(Arena* arena, ArraySizeType size) { - ABSL_DCHECK(arena != nullptr); - // Allocate max allowed capacity. - // TODO: improve this to reduce waste when the size is small. - void* c = arena->AllocateAligned(kMaxArraySize + sizeof(ArraySizeType)); - ABSL_DCHECK_EQ(reinterpret_cast(c) & kTagBits, uintptr_t{0}); - value_ = reinterpret_cast(c) | kTagArray; - SetArraySize(c, size); - return static_cast(c) + sizeof(ArraySizeType); - } - - void AppendToArray(absl::string_view view) { - char* array = reinterpret_cast(value_ - kTagArray); - ArraySizeType size = GetArraySize(array); - char* c = array + sizeof(size) + size; - size += view.size(); - SetArraySize(array, size); - memcpy(c, view.data(), view.size()); - } - - void ZeroOutTailingBytes() { - char* array = reinterpret_cast(value_ - kTagArray); - auto size = GetArraySize(array); - if (size < kMaxArraySize) { - memset(array + sizeof(ArraySizeType) + size, '\0', - kMaxArraySize - size); - } - } - - size_t SpaceUsedExcludingSelf() const { - return Visit( - [] { return 0; }, - [](const auto& cord) { return cord.EstimatedMemoryUsage(); }, - [](auto view) { return kMaxArraySize + sizeof(ArraySizeType); }); - } - - void TransferHeapOwnershipToArena(Arena* arena) { - ABSL_DCHECK(tag() == kTagCord || tag() == kTagEmpty); - if (IsCord()) arena->Own(&AsCord()); - } - - private: - uintptr_t value_ = 0; - }; - - public: - static bool ParseWithOuterContext(RepeatedPtrFieldBase* value, - const absl::Cord& input, ParseContext* ctx, - const MessageLite* prototype, - bool set_missing_required); - static bool ParseWithOuterContext(RepeatedPtrFieldBase* value, - absl::string_view input, ParseContext* ctx, - const MessageLite* prototype, - bool set_missing_required); - - private: - // This method has to be below the definition of class UnparsedPayload due to - // the call to `unparsed_.Visit`. - // TODO: Deduplicate with LazyField. - MessageState DoParse(RepeatedPtrFieldBase* old, const MessageLite& prototype, - Arena* arena, ParseContext* ctx, - bool maybe_uninitialized) const { - auto* value = - (old == nullptr) ? Arena::Create(arena) : old; - if (!unparsed_.Visit( - [] { return true; }, - [&](const auto& cord) { - return ParseWithOuterContext(value, cord, ctx, &prototype, - maybe_uninitialized); - }, - [&](auto view) { - return ParseWithOuterContext(value, view, ctx, &prototype, - maybe_uninitialized); - })) { - // If this is called by eager verification, ctx != nullptr and logging - // parsing error in that case is likely redundant because the parsing will - // fail anyway. Users who care about parsing errors would have already - // checked the return value and others may find the error log unexpected. - // - // `ctx == nullptr` means it's not eagerly verified (e.g. unverified lazy) - // and logging in that case makes sense. - if (ctx == nullptr) { - LogParseError(value); - } - return MessageState(value, RawState::kParseError); - } - return MessageState(value, maybe_uninitialized - ? RawState::kIsParsedMaybeUninitialized - : RawState::kIsParsed); - } - - // Mutable because it is initialized lazily. - // A MessageState is a tagged RepeatedPtrFieldBase* - mutable std::atomic raw_; - - // NOT mutable because we keep the payload around until the message changes in - // some way. - UnparsedPayload unparsed_; - // absl::Cord will make copies on anything under this limit, so we might as - // well do the copies into our own buffer instead. - static constexpr size_t kMaxArraySize = 512; - static_assert(kMaxArraySize <= - std::numeric_limits::max()); - friend class ::google::protobuf::Arena; - friend class ::google::protobuf::Reflection; - friend class ExtensionSet; - typedef void InternalArenaConstructable_; - typedef void DestructorSkippable_; - - // Logs a parsing error. - static void LogParseError(const RepeatedPtrFieldBase* value); - - bool IsAllocated() const { - return raw_.load(std::memory_order_acquire).value() != nullptr; - } - - // For testing purposes. - friend class LazyRepeatedPtrFieldTest; - friend class LazyRepeatedInMessageTest; - template - void OverwriteForTest(RawState status, const absl::Cord& unparsed, - RepeatedPtrField* value, Arena* arena); -}; - -inline LazyRepeatedPtrField::~LazyRepeatedPtrField() { - const auto* value = raw_.load(std::memory_order_relaxed).value(); - delete reinterpret_cast*>(value); - unparsed_.Destroy(); -} - -// TODO: Deduplicate with LazyField. -inline const RepeatedPtrFieldBase* LazyRepeatedPtrField::TryGetRepeated() - const { - switch (GetLogicalState()) { - case LogicalState::kDirty: - case LogicalState::kNoParseRequired: - case LogicalState::kParseRequired: - return raw_.load(std::memory_order_relaxed).value(); - case LogicalState::kClear: - case LogicalState::kClearExposed: - return nullptr; - } - internal::Unreachable(); - return nullptr; -} - -// ------------------------------------------------------------------- -// Testing stuff. - -// It's in the header due to the template. -// TODO: Deduplicate with LazyField. -template -void LazyRepeatedPtrField::OverwriteForTest(RawState status, - const absl::Cord& unparsed, - RepeatedPtrField* value, - Arena* arena) { - auto raw = raw_.load(std::memory_order_relaxed); - if (arena == nullptr) { - delete reinterpret_cast*>(raw.value()); - } - raw.set_value(reinterpret_cast(value)); - raw.set_status(status); - if (!unparsed.empty()) { - if (arena != nullptr && unparsed.size() <= kMaxArraySize) { - unparsed_.InitAndSetArray(arena, unparsed); - } else { - unparsed_.SetCord(arena, unparsed); - } - } - raw_.store(raw, std::memory_order_relaxed); -} - -} // namespace internal -} // namespace protobuf -} // namespace google - -#include "google/protobuf/port_undef.inc" - -#endif // GOOGLE_PROTOBUF_LAZY_REPEATED_FIELD_H__ diff --git a/src/google/protobuf/lazy_repeated_field_heavy.cc b/src/google/protobuf/lazy_repeated_field_heavy.cc deleted file mode 100644 index 62e7ac6b47e33..0000000000000 --- a/src/google/protobuf/lazy_repeated_field_heavy.cc +++ /dev/null @@ -1,401 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2023 Google Inc. All rights reserved. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file or at -// https://developers.google.com/open-source/licenses/bsd - -#include -#include -#include -#include -#include - -#include "absl/log/absl_check.h" -#include "absl/log/absl_log.h" -#include "absl/strings/cord.h" -#include "absl/strings/escaping.h" -#include "absl/strings/str_cat.h" -#include "absl/strings/str_replace.h" -#include "absl/strings/string_view.h" -#include "google/protobuf/arena.h" -#include "google/protobuf/io/zero_copy_stream_impl_lite.h" -#include "google/protobuf/lazy_repeated_field.h" -#include "google/protobuf/message.h" -#include "google/protobuf/message_lite.h" -#include "google/protobuf/repeated_ptr_field.h" -#include "google/protobuf/wire_format_lite.h" - -namespace google { -namespace protobuf { -namespace internal { -namespace { - -class ByFactory { - public: - explicit ByFactory(const Descriptor* type, MessageFactory* factory) - : type_(type), factory_(factory) {} - - Message* New(Arena* arena) const { - return factory_->GetPrototype(type_)->New(arena); - } - - const Message& Default() const { return *factory_->GetPrototype(type_); } - - private: - const Descriptor* type_; - MessageFactory* factory_; -}; - -// Escape C++ trigraphs by escaping question marks to \? -std::string EscapeTrigraphs(absl::string_view to_escape) { - return absl::StrReplaceAll(to_escape, {{"?", "\\?"}}); -} - -std::string EscapeEncoded(absl::string_view encoded) { - std::string out; - out.reserve(encoded.size() * 2); - constexpr size_t kBytesPerLine = 25; - for (size_t i = 0; i < encoded.size(); i += kBytesPerLine) { - absl::StrAppend( - &out, "\"", - EscapeTrigraphs(absl::CEscape(encoded.substr(i, kBytesPerLine))), - "\"\n"); - } - return out; -} - -// Deterministic serialization is required to minimize false positives: e.g. -// ordering, redundant wire format data, etc. Such discrepancies are -// expected and tolerated. To prevent this serialization starts yet another -// consistency check, we should skip consistency-check. -std::string DeterministicSerialization(const google::protobuf::MessageLite& m) { - std::string result; - { - google::protobuf::io::StringOutputStream sink(&result); - google::protobuf::io::CodedOutputStream out(&sink); - out.SetSerializationDeterministic(true); - out.SkipCheckConsistency(); - m.SerializePartialToCodedStream(&out); - } - return result; -} - -// If LazyField is initialized, unparsed and message should be consistent. If -// a LazyField is mutated via const_cast, that may break. We should rather fail -// than silently propagate such discrepancy. Note that this aims to detect -// missing/added data. -void VerifyConsistency(LazyRepeatedPtrField::LogicalState state, - const RepeatedPtrFieldBase* value, - const MessageLite* prototype, const absl::Cord& unparsed, - io::EpsCopyOutputStream* stream) { -#ifndef NDEBUG - if (stream != nullptr && !stream->ShouldCheckConsistency()) return; - if (state != LazyRepeatedPtrField::LogicalState::kNoParseRequired) return; - - RepeatedPtrField unparsed_msg; - if (!LazyRepeatedPtrField::ParseWithOuterContext( - reinterpret_cast(&unparsed_msg), unparsed, - nullptr, prototype, /*set_missing_required=*/false)) { - // Bail out on parse failure as it can result in false positive - // inconsistency and ABSL_CHECK failure. Warn instead. - ABSL_LOG(WARNING) - << "Verify skipped due to parse failure: RepeatedPtrField of " - << prototype->GetTypeName(); - return; - } - - const auto* msgs = reinterpret_cast*>(value); - // Eagerly parse all lazy fields to eliminate non-canonical wireformat data. - for (int i = 0; i < msgs->size(); i++) { - // Clone a new one from the original to eagerly parse all lazy - // fields. - const auto& msg = msgs->Get(i); - std::unique_ptr clone(msg.New()); - clone->CopyFrom(msg); - EagerParseLazyFieldIgnoreUnparsed(*clone); - EagerParseLazyFieldIgnoreUnparsed(*unparsed_msg.Mutable(i)); - ABSL_DCHECK_EQ(DeterministicSerialization(*clone), - DeterministicSerialization(unparsed_msg.Get(i))) - << "RepeatedPtrField<" << msg.GetTypeName() << ">(" << i << ")" - << ": likely mutated via getters + const_cast\n" - << "unparsed:\n" - << EscapeEncoded(DeterministicSerialization(unparsed_msg.Get(i))) - << "\nmessage:\n" - << EscapeEncoded(DeterministicSerialization(*clone)); - } -#endif // !NDEBUG -} - -} // namespace - -LazyRepeatedPtrField::LazyRepeatedPtrField(Arena* arena, - const LazyRepeatedPtrField& rhs, - Arena* rhs_arena) - : raw_(MessageState(RawState::kCleared)) { - switch (rhs.GetLogicalState()) { - case LogicalState::kClear: - case LogicalState::kClearExposed: - return; // Leave uninitialized / empty - case LogicalState::kNoParseRequired: - case LogicalState::kParseRequired: { - rhs.unparsed_.Visit( - [] {}, // - [&](const auto& cord) { unparsed_.InitAsCord(arena, cord); }, - [&](auto view) { - if (arena == nullptr) { - unparsed_.InitAsCord(nullptr, view); - } else { - unparsed_.InitAndSetArray(arena, view); - } - }); - raw_.store( - MessageState(nullptr, rhs.MaybeUninitialized() - ? RawState::kNeedsParseMaybeUninitialized - : RawState::kNeedsParse), - std::memory_order_relaxed); - return; - } - case LogicalState::kDirty: { - MessageState state = rhs.raw_.load(std::memory_order_relaxed); - const auto* src = state.value(); - if (src->empty()) { - return; // Leave uninitialized / empty - } - // Retain the existing IsParsed or IsParsedMaybeUninitialized status. - // TODO: use copy construction. - auto new_state = state.status(); - auto* value = Arena::Create(arena); - // MergeFrom calls reserve. - value->MergeFrom(*src); - raw_.store(MessageState(value, new_state), std::memory_order_relaxed); - return; - } - } -} - -const RepeatedPtrFieldBase* LazyRepeatedPtrField::GetDynamic( - const Descriptor* type, MessageFactory* factory, Arena* arena) const { - return GetGeneric(ByFactory(type, factory), arena, nullptr); -} - -RepeatedPtrFieldBase* LazyRepeatedPtrField::MutableDynamic( - const Descriptor* type, MessageFactory* factory, Arena* arena) { - return MutableGeneric(ByFactory(type, factory), arena, nullptr); -} - -size_t LazyRepeatedPtrField::SpaceUsedExcludingSelfLong() const { - // absl::Cord::EstimatedMemoryUsage counts itself that should be excluded - // because sizeof(Cord) is already counted in self. - size_t total_size = unparsed_.SpaceUsedExcludingSelf(); - switch (GetLogicalState()) { - case LogicalState::kClearExposed: - case LogicalState::kNoParseRequired: - case LogicalState::kDirty: { - const auto* value = raw_.load(std::memory_order_relaxed).value(); - total_size += - value->SpaceUsedExcludingSelfLong>(); - } break; - case LogicalState::kClear: - case LogicalState::kParseRequired: - // We may have a `Message*` here, but we cannot safely access it - // because, a racing SharedInit could delete it out from under us. - // Other states in this structure are already passed kSharedInit and are - // thus safe. - break; // Nothing to add. - } - return total_size; -} - -template -bool LazyRepeatedPtrField::MergeFrom(const MessageLite* prototype, - const Input& data, Arena* arena) { - switch (GetLogicalState()) { - case LogicalState::kParseRequired: { - unparsed_.UpgradeToCord(arena).Append(data); - break; - } - case LogicalState::kClear: { - size_t num_bytes = data.size(); - ABSL_DCHECK(num_bytes > 0); - if (arena == nullptr || num_bytes > kMaxArraySize || unparsed_.IsCord()) { - unparsed_.SetCord(arena, data); - } else { - unparsed_.InitAndSetArray(arena, data); - } - SetNeedsParse(); - break; - } - - // Pointer was previously exposed merge into that object. - case LogicalState::kClearExposed: - case LogicalState::kNoParseRequired: - case LogicalState::kDirty: { - auto new_state = PerformTransition([&](ExclusiveTxn& txn) { - auto* value = txn.mutable_value(); - bool res = - ParseWithOuterContext(value, data, /*ctx=*/nullptr, prototype, - /*set_missing_required=*/false); - if (!res) { - LogParseError(value); - return RawState::kParseError; - } else { - return RawState::kIsParsed; - } - }); - return new_state == RawState::kIsParsed; - } - } - return true; -} - -void LazyRepeatedPtrField::MergeMaybeUninitializedState( - const LazyRepeatedPtrField& other) { - if (MaybeUninitialized() || !other.MaybeUninitialized()) return; - - switch (GetLogicalState()) { - case LogicalState::kParseRequired: - SetNeedsParseMaybeUninitialized(); - break; - case LogicalState::kNoParseRequired: - SetParseNotRequiredMaybeUninitialized(); - break; - default: - break; - } -} - -void LazyRepeatedPtrField::MergeFrom(const MessageLite* prototype, - const LazyRepeatedPtrField& other, - Arena* arena, Arena* other_arena) { -#ifndef NDEBUG - VerifyConsistency(other.GetLogicalState(), - other.raw_.load(std::memory_order_relaxed).value(), - prototype, other.unparsed_.ForceAsCord(), nullptr); -#endif // !NDEBUG - switch (other.GetLogicalState()) { - case LogicalState::kClear: - case LogicalState::kClearExposed: - return; // Nothing to do. - - case LogicalState::kParseRequired: - case LogicalState::kNoParseRequired: - if (other.unparsed_.IsCord()) { - MergeFrom(prototype, other.unparsed_.AsCord(), arena); - } else { - MergeFrom(prototype, other.unparsed_.AsStringView(), arena); - } - MergeMaybeUninitializedState(other); - return; - - case LogicalState::kDirty: { - const auto* other_value = - other.raw_.load(std::memory_order_relaxed).value(); - if (other_value->empty()) { - return; - } - auto* value = MutableByPrototype(prototype, arena); - value->MergeFrom(*other_value); - // No need to merge uninitialized state. - ABSL_DCHECK(GetLogicalState() == LogicalState::kDirty); - return; - } - } -} - -uint8_t* LazyRepeatedPtrField::InternalWrite( - const MessageLite* prototype, int32_t number, uint8_t* target, - io::EpsCopyOutputStream* stream) const { -#ifndef NDEBUG - VerifyConsistency(GetLogicalState(), - raw_.load(std::memory_order_relaxed).value(), prototype, - unparsed_.ForceAsCord(), stream); -#endif // !NDEBUG - switch (GetLogicalState()) { - case LogicalState::kClear: - case LogicalState::kClearExposed: - case LogicalState::kNoParseRequired: - case LogicalState::kParseRequired: - // If deterministic is enabled then attempt to parse to a message which - // can then be serialized deterministically. (The serialized bytes may - // have been created undeterministically). - if (stream->IsSerializationDeterministic() && prototype != nullptr) { - RepeatedPtrField value; - // TODO: Test this path. - bool success = unparsed_.Visit( - [] { return true; }, - [&](const auto& cord) { - // `set_missing_required = false` to avoid checking require fields - // (similar to Message::ParsePartial*). - return ParseWithOuterContext( - reinterpret_cast(&value), cord, - /*ctx=*/nullptr, prototype, /*set_missing_required=*/false); - }, - [&](auto view) { - return ParseWithOuterContext( - reinterpret_cast(&value), view, - /*ctx=*/nullptr, prototype, false); - }); - if (success) { - size_t tag_size = WireFormatLite::TagSize( - number, WireFormatLite::FieldType::TYPE_MESSAGE); - auto count = tag_size * value.size(); - for (int i = 0; i < value.size(); i++) { - count += WireFormatLite::LengthDelimitedSize( - value.Get(i).ByteSizeLong()); - } - - // Serialization takes place in two phases: - // 1) Figure out the expected number of bytes (e.g. ByteSizeLong() on - // the container message) 2) InternalWrite the bytes. - // - // There is a golden contract that the # of bytes written matches - // the returned value from the first step. - // - // In this case unparsed_ was used as the source of truth for the - // number of bytes. There are some known cases where the number of - // serialized bytes is different than the number of bytes written - // by a message parsed from the serialized bytes. For example if the - // serialized representation contained multiple entries for the same - // non-repeated field the duplicates are removed upon parsing. - // - // If this (relatively rare) case is hit then there is no choice - // but to serialize the original unparsed bytes; otherwise the - // golden contract is broken. - // It's possible for the size to change if the unparsed_ was not - // canonical, for example it can have repeated entries for the same - // tag (this is more common then you would think). - if (count == unparsed_.Size()) { - for (int i = 0, n = value.size(); i < n; i++) { - const auto& repfield = value.Get(i); - target = WireFormatLite::InternalWriteMessage( - number, repfield, repfield.GetCachedSize(), target, stream); - } - return target; - } - } - } - return unparsed_.Visit( - [&] { return target; }, - [&](const auto& cord) { return stream->WriteCord(cord, target); }, - [&](auto view) { - return stream->WriteRaw(view.data(), view.size(), target); - }); - case LogicalState::kDirty: { - const auto* value = raw_.load(std::memory_order_relaxed).value(); - for (int i = 0, n = value->size(); i < n; i++) { - const auto& repfield = value->Get>(i); - target = WireFormatLite::InternalWriteMessage( - number, repfield, repfield.GetCachedSize(), target, stream); - } - return target; - } - } - // Required for certain compiler configurations. - ABSL_LOG(FATAL) << "Not reachable"; - return nullptr; -} - -} // namespace internal -} // namespace protobuf -} // namespace google diff --git a/src/google/protobuf/repeated_ptr_field.h b/src/google/protobuf/repeated_ptr_field.h index 7b08a245f1017..36ca1c65c954b 100644 --- a/src/google/protobuf/repeated_ptr_field.h +++ b/src/google/protobuf/repeated_ptr_field.h @@ -579,7 +579,6 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase { // subclass. friend class google::protobuf::Reflection; friend class internal::SwapFieldHelper; - friend class LazyRepeatedPtrField; // Concrete Arena enabled copy function used to copy messages instances. // This follows the `Arena::CopyConstruct` signature so that the compiler