From 68e6e3e8eecd31b0078422f688a3f01fb881d367 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Fri, 22 Mar 2024 01:24:05 -0700 Subject: [PATCH] C#: Optimize JSON parsing in JsonTokenizer PiperOrigin-RevId: 618097513 --- csharp/src/Google.Protobuf/JsonTokenizer.cs | 117 ++++++++++++-------- 1 file changed, 70 insertions(+), 47 deletions(-) diff --git a/csharp/src/Google.Protobuf/JsonTokenizer.cs b/csharp/src/Google.Protobuf/JsonTokenizer.cs index d80eed98bddf..f89e36adf4df 100644 --- a/csharp/src/Google.Protobuf/JsonTokenizer.cs +++ b/csharp/src/Google.Protobuf/JsonTokenizer.cs @@ -1,4 +1,4 @@ -#region Copyright notice and license +#region Copyright notice and license // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. // @@ -204,14 +204,13 @@ protected override JsonToken NextImpl() while (true) { var next = reader.Read(); - if (next == null) - { - ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: "); - state = State.ReaderExhausted; - return JsonToken.EndDocument; - } - switch (next.Value) + switch (next) { + case -1: + ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: "); + state = State.ReaderExhausted; + return JsonToken.EndDocument; + // Skip whitespace between tokens case ' ': case '\t': @@ -279,11 +278,11 @@ protected override JsonToken NextImpl() case '7': case '8': case '9': - double number = ReadNumber(next.Value); + double number = ReadNumber((char) next); ValidateAndModifyStateForValue("Invalid state to read a number token: "); return JsonToken.Value(number); default: - throw new InvalidJsonException("Invalid first character of token: " + next.Value); + throw new InvalidJsonException($"Invalid first character of token: {(char) next}"); } } } @@ -395,14 +394,15 @@ private void ConsumeLiteral(string text) { for (int i = 1; i < text.Length; i++) { - char? next = reader.Read(); - if (next == null) - { - throw reader.CreateException("Unexpected end of text while reading literal token " + text); - } - if (next.Value != text[i]) + int next = reader.Read(); + if (next != text[i]) { - throw reader.CreateException("Unexpected character while reading literal token " + text); + // Only check for "end of text" when we've detected that the character differs from the + // expected one. + var message = next == -1 + ? $"Unexpected end of text while reading literal token {text}" + : $"Unexpected character while reading literal token {text}"; + throw reader.CreateException(message); } } } @@ -422,7 +422,7 @@ private double ReadNumber(char initialCharacter) // Each method returns the character it read that doesn't belong in that part, // so we know what to do next, including pushing the character back at the end. // null is returned for "end of text". - char? next = ReadInt(builder); + int next = ReadInt(builder); if (next == '.') { next = ReadFrac(builder); @@ -433,9 +433,9 @@ private double ReadNumber(char initialCharacter) } // If we read a character which wasn't part of the number, push it back so we can read it again // to parse the next token. - if (next != null) + if (next != -1) { - reader.PushBack(next.Value); + reader.PushBack((char) next); } // TODO: What exception should we throw if the value can't be represented as a double? @@ -461,7 +461,12 @@ private double ReadNumber(char initialCharacter) } } - private char? ReadInt(StringBuilder builder) + /// + /// Copies an integer into a StringBuilder. + /// + /// The builder to read the number into + /// The character following the integer, or -1 for end-of-text. + private int ReadInt(StringBuilder builder) { char first = reader.ReadOrFail("Invalid numeric literal"); if (first < '0' || first > '9') @@ -469,7 +474,7 @@ private double ReadNumber(char initialCharacter) throw reader.CreateException("Invalid numeric literal"); } builder.Append(first); - char? next = ConsumeDigits(builder, out int digitCount); + int next = ConsumeDigits(builder, out int digitCount); if (first == '0' && digitCount != 0) { throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value."); @@ -477,10 +482,15 @@ private double ReadNumber(char initialCharacter) return next; } - private char? ReadFrac(StringBuilder builder) + /// + /// Copies the fractional part of an integer into a StringBuilder, assuming reader is positioned after a period. + /// + /// The builder to read the number into + /// The character following the fractional part, or -1 for end-of-text. + private int ReadFrac(StringBuilder builder) { builder.Append('.'); // Already consumed this - char? next = ConsumeDigits(builder, out int digitCount); + int next = ConsumeDigits(builder, out int digitCount); if (digitCount == 0) { throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits"); @@ -488,21 +498,26 @@ private double ReadNumber(char initialCharacter) return next; } - private char? ReadExp(StringBuilder builder) + /// + /// Copies the exponent part of a number into a StringBuilder, with an assumption that the reader is already positioned after the "e". + /// + /// The builder to read the number into + /// The character following the exponent, or -1 for end-of-text. + private int ReadExp(StringBuilder builder) { builder.Append('E'); // Already consumed this (or 'e') - char? next = reader.Read(); - if (next == null) + int next = reader.Read(); + if (next == -1) { throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits"); } if (next == '-' || next == '+') { - builder.Append(next.Value); + builder.Append((char) next); } else { - reader.PushBack(next.Value); + reader.PushBack((char) next); } next = ConsumeDigits(builder, out int digitCount); if (digitCount == 0) @@ -512,18 +527,24 @@ private double ReadNumber(char initialCharacter) return next; } - private char? ConsumeDigits(StringBuilder builder, out int count) + /// + /// Copies a sequence of digits into a StringBuilder. + /// + /// The builder to read the number into + /// The number of digits appended to the builder + /// The character following the digits, or -1 for end-of-text. + private int ConsumeDigits(StringBuilder builder, out int count) { count = 0; while (true) { - char? next = reader.Read(); - if (next == null || next.Value < '0' || next.Value > '9') + int next = reader.Read(); + if (next == -1 || next < '0' || next > '9') { return next; } count++; - builder.Append(next.Value); + builder.Append((char) next); } } @@ -683,39 +704,41 @@ internal PushBackReader(TextReader reader) } /// - /// The buffered next character, if we have one. + /// The buffered next character, if we have one, or -1 if there is no buffered character. /// - private char? nextChar; + private int nextChar = -1; /// - /// Returns the next character in the stream, or null if we have reached the end. + /// Returns the next character in the stream, or -1 if we have reached the end of the stream. /// - /// - internal char? Read() + internal int Read() { - if (nextChar != null) + if (nextChar != -1) { - char? tmp = nextChar; - nextChar = null; + int tmp = nextChar; + nextChar = -1; return tmp; } - int next = reader.Read(); - return next == -1 ? null : (char?) next; + return reader.Read(); } + /// + /// Reads the next character from the underlying reader, throwing an + /// with the specified message if there are no more characters available. + /// internal char ReadOrFail(string messageOnFailure) { - char? next = Read(); - if (next == null) + int next = Read(); + if (next == -1) { throw CreateException(messageOnFailure); } - return next.Value; + return (char) next; } internal void PushBack(char c) { - if (nextChar != null) + if (nextChar != -1) { throw new InvalidOperationException("Cannot push back when already buffering a character"); }