Skip to content

Commit

Permalink
Enable Text format parser to skip unknown short-formed repeated fields.
Browse files Browse the repository at this point in the history
The text format specs state that short formed repeated field should be valid. For example:
```
repeated_field: [{}, {}]
```
However, this was not supported when text format parser tries to skip **unknown fields**.
PiperOrigin-RevId: 493085702
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Dec 5, 2022
1 parent d01a912 commit 6dbd413
Showing 1 changed file with 55 additions and 23 deletions.
78 changes: 55 additions & 23 deletions java/core/src/main/java/com/google/protobuf/TextFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -1893,18 +1893,8 @@ private void mergeField(

// Skips unknown fields.
if (field == null) {
// Try to guess the type of this field.
// If this field is not a message, there should be a ":" between the
// field name and the field value and also the field value should not
// start with "{" or "<" which indicates the beginning of a message body.
// If there is no ":" or there is a "{" or "<" after ":", this field has
// to be a message or the input is ill-formed.
detectSilentMarker(tokenizer, type, name);
if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) {
skipFieldValue(tokenizer);
} else {
skipFieldMessage(tokenizer, type);
}
guessFieldTypeAndSkip(tokenizer, type);
return;
}

Expand Down Expand Up @@ -2263,19 +2253,9 @@ private void mergeAnyFieldValue(
/** Skips the next field including the field's name and value. */
private void skipField(Tokenizer tokenizer, Descriptor type) throws ParseException {
String name = consumeFullTypeName(tokenizer);

// Try to guess the type of this field.
// If this field is not a message, there should be a ":" between the
// field name and the field value and also the field value should not
// start with "{" or "<" which indicates the beginning of a message body.
// If there is no ":" or there is a "{" or "<" after ":", this field has
// to be a message or the input is ill-formed.
detectSilentMarker(tokenizer, type, name);
if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") && !tokenizer.lookingAt("{")) {
skipFieldValue(tokenizer);
} else {
skipFieldMessage(tokenizer, type);
}
guessFieldTypeAndSkip(tokenizer, type);

// For historical reasons, fields may optionally be separated by commas or
// semicolons.
if (!tokenizer.tryConsume(";")) {
Expand Down Expand Up @@ -2314,6 +2294,58 @@ private void skipFieldValue(Tokenizer tokenizer) throws ParseException {
throw tokenizer.parseException("Invalid field value: " + tokenizer.currentToken);
}
}

/**
* Tries to guess the type of this field and skip it.
*
* <p>If this field is not a message, there should be a ":" between the field name and the field
* value and also the field value should not start with "{" or "<" which indicates the beginning
* of a message body. If there is no ":" or there is a "{" or "<" after ":", this field has to
* be a message or the input is ill-formed. For short-formed repeated fields (i.e. with "[]"),
* if it is repeated scalar, there must be a ":" between the field name and the starting "[" .
*/
private void guessFieldTypeAndSkip(Tokenizer tokenizer, Descriptor type) throws ParseException {
boolean semicolonConsumed = tokenizer.tryConsume(":");
if (tokenizer.lookingAt("[")) {
// Short repeated field form. If a semicolon was consumed, it could be repeated scalar or
// repeated message. If not, it must be repeated message.
skipFieldShortFormedRepeated(tokenizer, semicolonConsumed, type);
} else if (semicolonConsumed && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) {
skipFieldValue(tokenizer);
} else {
skipFieldMessage(tokenizer, type);
}
}

/**
* Skips a short-formed repeated field value.
*
* <p>Reports an error if scalar type is not allowed but showing up inside "[]".
*/
private void skipFieldShortFormedRepeated(
Tokenizer tokenizer, boolean scalarAllowed, Descriptor type) throws ParseException {
if (!tokenizer.tryConsume("[") || tokenizer.tryConsume("]")) {
// Try skipping "[]".
return;
}

while (true) {
if (tokenizer.lookingAt("{") || tokenizer.lookingAt("<")) {
// Try skipping message field inside "[]"
skipFieldMessage(tokenizer, type);
} else if (scalarAllowed) {
// Try skipping scalar field inside "[]".
skipFieldValue(tokenizer);
} else {
throw tokenizer.parseException(
"Invalid repeated scalar field: missing \":\" before \"[\".");
}
if (tokenizer.tryConsume("]")) {
break;
}
tokenizer.consume(",");
}
}
}

// =================================================================
Expand Down

0 comments on commit 6dbd413

Please sign in to comment.