Skip to content

Commit

Permalink
C#: Optimize JSON parsing in JsonTokenizer
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 618097513
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Mar 22, 2024
1 parent b361c9c commit 68e6e3e
Showing 1 changed file with 70 additions and 47 deletions.
117 changes: 70 additions & 47 deletions csharp/src/Google.Protobuf/JsonTokenizer.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#region Copyright notice and license
#region Copyright notice and license
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
//
Expand Down Expand Up @@ -204,14 +204,13 @@ protected override JsonToken NextImpl()
while (true)
{
var next = reader.Read();
if (next == null)
{
ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: ");
state = State.ReaderExhausted;
return JsonToken.EndDocument;
}
switch (next.Value)
switch (next)
{
case -1:
ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: ");
state = State.ReaderExhausted;
return JsonToken.EndDocument;

// Skip whitespace between tokens
case ' ':
case '\t':
Expand Down Expand Up @@ -279,11 +278,11 @@ protected override JsonToken NextImpl()
case '7':
case '8':
case '9':
double number = ReadNumber(next.Value);
double number = ReadNumber((char) next);
ValidateAndModifyStateForValue("Invalid state to read a number token: ");
return JsonToken.Value(number);
default:
throw new InvalidJsonException("Invalid first character of token: " + next.Value);
throw new InvalidJsonException($"Invalid first character of token: {(char) next}");
}
}
}
Expand Down Expand Up @@ -395,14 +394,15 @@ private void ConsumeLiteral(string text)
{
for (int i = 1; i < text.Length; i++)
{
char? next = reader.Read();
if (next == null)
{
throw reader.CreateException("Unexpected end of text while reading literal token " + text);
}
if (next.Value != text[i])
int next = reader.Read();
if (next != text[i])
{
throw reader.CreateException("Unexpected character while reading literal token " + text);
// Only check for "end of text" when we've detected that the character differs from the
// expected one.
var message = next == -1
? $"Unexpected end of text while reading literal token {text}"
: $"Unexpected character while reading literal token {text}";
throw reader.CreateException(message);
}
}
}
Expand All @@ -422,7 +422,7 @@ private double ReadNumber(char initialCharacter)
// Each method returns the character it read that doesn't belong in that part,
// so we know what to do next, including pushing the character back at the end.
// null is returned for "end of text".
char? next = ReadInt(builder);
int next = ReadInt(builder);
if (next == '.')
{
next = ReadFrac(builder);
Expand All @@ -433,9 +433,9 @@ private double ReadNumber(char initialCharacter)
}
// If we read a character which wasn't part of the number, push it back so we can read it again
// to parse the next token.
if (next != null)
if (next != -1)
{
reader.PushBack(next.Value);
reader.PushBack((char) next);
}

// TODO: What exception should we throw if the value can't be represented as a double?
Expand All @@ -461,48 +461,63 @@ private double ReadNumber(char initialCharacter)
}
}

private char? ReadInt(StringBuilder builder)
/// <summary>
/// Copies an integer into a StringBuilder.
/// </summary>
/// <param name="builder">The builder to read the number into</param>
/// <returns>The character following the integer, or -1 for end-of-text.</returns>
private int ReadInt(StringBuilder builder)
{
char first = reader.ReadOrFail("Invalid numeric literal");
if (first < '0' || first > '9')
{
throw reader.CreateException("Invalid numeric literal");
}
builder.Append(first);
char? next = ConsumeDigits(builder, out int digitCount);
int next = ConsumeDigits(builder, out int digitCount);
if (first == '0' && digitCount != 0)
{
throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value.");
}
return next;
}

private char? ReadFrac(StringBuilder builder)
/// <summary>
/// Copies the fractional part of an integer into a StringBuilder, assuming reader is positioned after a period.
/// </summary>
/// <param name="builder">The builder to read the number into</param>
/// <returns>The character following the fractional part, or -1 for end-of-text.</returns>
private int ReadFrac(StringBuilder builder)
{
builder.Append('.'); // Already consumed this
char? next = ConsumeDigits(builder, out int digitCount);
int next = ConsumeDigits(builder, out int digitCount);
if (digitCount == 0)
{
throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits");
}
return next;
}

private char? ReadExp(StringBuilder builder)
/// <summary>
/// Copies the exponent part of a number into a StringBuilder, with an assumption that the reader is already positioned after the "e".
/// </summary>
/// <param name="builder">The builder to read the number into</param>
/// <returns>The character following the exponent, or -1 for end-of-text.</returns>
private int ReadExp(StringBuilder builder)
{
builder.Append('E'); // Already consumed this (or 'e')
char? next = reader.Read();
if (next == null)
int next = reader.Read();
if (next == -1)
{
throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits");
}
if (next == '-' || next == '+')
{
builder.Append(next.Value);
builder.Append((char) next);
}
else
{
reader.PushBack(next.Value);
reader.PushBack((char) next);
}
next = ConsumeDigits(builder, out int digitCount);
if (digitCount == 0)
Expand All @@ -512,18 +527,24 @@ private double ReadNumber(char initialCharacter)
return next;
}

private char? ConsumeDigits(StringBuilder builder, out int count)
/// <summary>
/// Copies a sequence of digits into a StringBuilder.
/// </summary>
/// <param name="builder">The builder to read the number into</param>
/// <param name="count">The number of digits appended to the builder</param>
/// <returns>The character following the digits, or -1 for end-of-text.</returns>
private int ConsumeDigits(StringBuilder builder, out int count)
{
count = 0;
while (true)
{
char? next = reader.Read();
if (next == null || next.Value < '0' || next.Value > '9')
int next = reader.Read();
if (next == -1 || next < '0' || next > '9')
{
return next;
}
count++;
builder.Append(next.Value);
builder.Append((char) next);
}
}

Expand Down Expand Up @@ -683,39 +704,41 @@ internal PushBackReader(TextReader reader)
}

/// <summary>
/// The buffered next character, if we have one.
/// The buffered next character, if we have one, or -1 if there is no buffered character.
/// </summary>
private char? nextChar;
private int nextChar = -1;

/// <summary>
/// Returns the next character in the stream, or null if we have reached the end.
/// Returns the next character in the stream, or -1 if we have reached the end of the stream.
/// </summary>
/// <returns></returns>
internal char? Read()
internal int Read()
{
if (nextChar != null)
if (nextChar != -1)
{
char? tmp = nextChar;
nextChar = null;
int tmp = nextChar;
nextChar = -1;
return tmp;
}
int next = reader.Read();
return next == -1 ? null : (char?) next;
return reader.Read();
}

/// <summary>
/// Reads the next character from the underlying reader, throwing an <see cref="InvalidJsonException" />
/// with the specified message if there are no more characters available.
/// </summary>
internal char ReadOrFail(string messageOnFailure)
{
char? next = Read();
if (next == null)
int next = Read();
if (next == -1)
{
throw CreateException(messageOnFailure);
}
return next.Value;
return (char) next;
}

internal void PushBack(char c)
{
if (nextChar != null)
if (nextChar != -1)
{
throw new InvalidOperationException("Cannot push back when already buffering a character");
}
Expand Down

0 comments on commit 68e6e3e

Please sign in to comment.